x86-64: Fix TLSDESC relaxation for x32

Message ID 20200118230704.297139-1-hjl.tools@gmail.com
State New
Headers show
Series
  • x86-64: Fix TLSDESC relaxation for x32
Related show

Commit Message

H.J. Lu Jan. 18, 2020, 11:07 p.m.
For X32, TLSDESC sequences can be

8d 05 00 00 00 00	lea	x@TLSDESC(%rip), %eax
67 ff 10		call	*x@TLSCALL(%eax)

or the same sequence as LP64

48 8d 05 00 00 00 00	lea	foo@TLSDESC(%rip), %rax
ff 10			call	*foo@TLSCALL(%rax)

we need to support both sequences for x32.

8d 05 00 00 00 00	lea	x@TLSDESC(%rip), %eax

should relaxed to

c7 c0 fc ff ff ff	movl	$x@tpoff, %eax

and

67 ff 10		call	*x@TLSCALL(%eax)

should relaxed to

0f 1f 00		nopl	(%rax)

bfd/

	PR ld/25416
	* elf64-x86-64.c (elf_x86_64_check_tls_transition): Support
	"leal x@tlsdesc(%rip), %eax" and "call *x@tlsdesc(%eax)" in
	X32 mode.
	(elf_x86_64_relocate_section): Relax "leal x@tlsdesc(%rip), %eax"
	to "movl $x@tpoff, %eax" and "call *(%eax)" to "nopl (%rax)" in
	X32 mode.

ld/

	PR ld/25416
	* testsuite/ld-x86-64/pr25416-1.d: New file.
	* testsuite/ld-x86-64/pr25416-1.s: Likewise.
	* testsuite/ld-x86-64/pr25416-1.s: Likewise.
	* testsuite/ld-x86-64/pr25416-2.d: Likewise.
	* testsuite/ld-x86-64/pr25416-2.s: Likewise.
	* testsuite/ld-x86-64/pr25416-3a.c: Likewise.
	* testsuite/ld-x86-64/pr25416-3b.s: Likewise.
	* testsuite/ld-x86-64/pr25416-3c.s: Likewise.
	* testsuite/ld-x86-64/x86-64.exp: Run PR ld/25416 tests.
---
 bfd/elf64-x86-64.c                  | 84 ++++++++++++++++++++++-------
 ld/testsuite/ld-x86-64/pr25416-1.d  | 13 +++++
 ld/testsuite/ld-x86-64/pr25416-1.s  | 13 +++++
 ld/testsuite/ld-x86-64/pr25416-2.d  | 13 +++++
 ld/testsuite/ld-x86-64/pr25416-2.s  | 13 +++++
 ld/testsuite/ld-x86-64/pr25416-3a.c | 27 ++++++++++
 ld/testsuite/ld-x86-64/pr25416-3b.s | 50 +++++++++++++++++
 ld/testsuite/ld-x86-64/pr25416-3c.s | 54 +++++++++++++++++++
 ld/testsuite/ld-x86-64/x86-64.exp   | 69 ++++++++++++++++++++++++
 9 files changed, 317 insertions(+), 19 deletions(-)
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-1.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-1.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-2.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-2.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-3a.c
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-3b.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-3c.s

-- 
2.24.1

Comments

H.J. Lu Jan. 19, 2020, 2:44 a.m. | #1
On Sat, Jan 18, 2020 at 3:07 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>

> For X32, TLSDESC sequences can be

>

> 8d 05 00 00 00 00       lea     x@TLSDESC(%rip), %eax

> 67 ff 10                call    *x@TLSCALL(%eax)

>


For x32, we must encode "lea x@TLSDESC(%rip), %reg" with a REX prefix
even if it isn't required.  Otherwise linker can’t safely perform GDesc -> LE
optimization.  If the lea encoding has a variable length, linker can't
tell where
it starts.  Here is the updated patch to always generate a REX prefix.


-- 
H.J.
From 85db7ff14e866426d3c350de8e885619f56f66f1 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sat, 18 Jan 2020 13:18:21 -0800
Subject: [PATCH] x86-64: Fix TLSDESC relaxation for x32
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For x32, we must encode "lea x@TLSDESC(%rip), %reg" with a REX prefix
even if it isn't required.  Otherwise linker can’t safely perform
GDesc -> LE optimization.  X32 TLSDESC sequences can be:

40 8d 05 00 00 00 00	rex lea	x@TLSDESC(%rip), %reg
...
67 ff 10		call	*x@TLSCALL(%eax)

or the same sequence as LP64:

48 8d 05 00 00 00 00	lea	foo@TLSDESC(%rip), %reg
...
ff 10			call	*foo@TLSCALL(%rax)

We need to support both sequences for x32.

40 8d 05 00 00 00 00	rex lea	x@TLSDESC(%rip), %reg

should relaxed to

40 c7 c0 fc ff ff ff	rex movl $x@tpoff, %reg

and

67 ff 10		call	*x@TLSCALL(%eax)

should relaxed to

0f 1f 00		nopl	(%rax)

bfd/

	PR ld/25416
	* elf64-x86-64.c (elf_x86_64_check_tls_transition): Support
	"rex leal x@tlsdesc(%rip), %reg" and "call *x@tlsdesc(%eax)" in
	X32 mode.
	(elf_x86_64_relocate_section): In x32 mode, relax
	"rex leal x@tlsdesc(%rip), %reg" to "rex movl $x@tpoff, %reg"
	and "call *(%eax)" to "nopl (%rax)".

gas/

	PR ld/25416
	* config/tc-i386.c (output_insn): Add a dummy REX_OPCODE prefix
	for lea with R_X86_64_GOTPC32_TLSDESC relocation when generating
	x32 object.
	* testsuite/gas/i386/ilp32/x32-tls.d: Updated.
	* testsuite/gas/i386/ilp32/x32-tls.s: Add tests for lea with
	R_X86_64_GOTPC32_TLSDESC relocation.

ld/

	PR ld/25416
	* testsuite/ld-x86-64/pr25416-1.d: New file.
	* testsuite/ld-x86-64/pr25416-1.s: Likewise.
	* testsuite/ld-x86-64/pr25416-1.s: Likewise.
	* testsuite/ld-x86-64/pr25416-2.d: Likewise.
	* testsuite/ld-x86-64/pr25416-2.s: Likewise.
	* testsuite/ld-x86-64/pr25416-3a.c: Likewise.
	* testsuite/ld-x86-64/pr25416-3b.s: Likewise.
	* testsuite/ld-x86-64/pr25416-3c.s: Likewise.
	* testsuite/ld-x86-64/x86-64.exp: Run PR ld/25416 tests.
---
 bfd/elf64-x86-64.c                     | 62 ++++++++++++++++++-----
 gas/config/tc-i386.c                   |  7 ++-
 gas/testsuite/gas/i386/ilp32/x32-tls.d |  2 +
 gas/testsuite/gas/i386/ilp32/x32-tls.s |  2 +
 ld/testsuite/ld-x86-64/pr25416-1.d     | 13 +++++
 ld/testsuite/ld-x86-64/pr25416-1.s     | 13 +++++
 ld/testsuite/ld-x86-64/pr25416-2.d     | 13 +++++
 ld/testsuite/ld-x86-64/pr25416-2.s     | 13 +++++
 ld/testsuite/ld-x86-64/pr25416-3a.c    | 27 ++++++++++
 ld/testsuite/ld-x86-64/pr25416-3b.s    | 50 +++++++++++++++++++
 ld/testsuite/ld-x86-64/pr25416-3c.s    | 54 ++++++++++++++++++++
 ld/testsuite/ld-x86-64/x86-64.exp      | 69 ++++++++++++++++++++++++++
 12 files changed, 311 insertions(+), 14 deletions(-)
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-1.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-1.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-2.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-2.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-3a.c
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-3b.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-3c.s

diff --git a/bfd/elf64-x86-64.c b/bfd/elf64-x86-64.c
index 79e68ff4767..af1e1e825f3 100644
--- a/bfd/elf64-x86-64.c
+++ b/bfd/elf64-x86-64.c
@@ -1223,7 +1223,8 @@ elf_x86_64_check_tls_transition (bfd *abfd,
 
     case R_X86_64_GOTPC32_TLSDESC:
       /* Check transition from GDesc access model:
-		leaq x@tlsdesc(%rip), %rax
+		leaq x@tlsdesc(%rip), %rax <--- LP64 mode.
+		leal x@tlsdesc(%rip), %eax <--- X32 mode.
 
 	 Make sure it's a leaq adding rip to a 32-bit offset
 	 into any register, although it's probably almost always
@@ -1233,7 +1234,8 @@ elf_x86_64_check_tls_transition (bfd *abfd,
 	return FALSE;
 
       val = bfd_get_8 (abfd, contents + offset - 3);
-      if ((val & 0xfb) != 0x48)
+      val &= 0xfb;
+      if (val != 0x48 && (ABI_64_P (abfd) || val != 0x40))
 	return FALSE;
 
       if (bfd_get_8 (abfd, contents + offset - 2) != 0x8d)
@@ -1244,13 +1246,26 @@ elf_x86_64_check_tls_transition (bfd *abfd,
 
     case R_X86_64_TLSDESC_CALL:
       /* Check transition from GDesc access model:
-		call *x@tlsdesc(%rax)
+		call *x@tlsdesc(%rax) <--- LP64 mode.
+		call *x@tlsdesc(%eax) <--- X32 mode.
        */
       if (offset + 2 <= sec->size)
 	{
-	  /* Make sure that it's a call *x@tlsdesc(%rax).  */
+	  unsigned int prefix;
 	  call = contents + offset;
-	  return call[0] == 0xff && call[1] == 0x10;
+	  prefix = 0;
+	  if (!ABI_64_P (abfd))
+	    {
+	      /* Check for call *x@tlsdesc(%eax).  */
+	      if (call[0] == 0x67)
+		{
+		  prefix = 1;
+		  if (offset + 3 > sec->size)
+		    return FALSE;
+		}
+	    }
+	  /* Make sure that it's a call *x@tlsdesc(%rax).  */
+	  return call[prefix] == 0xff && call[1 + prefix] == 0x10;
 	}
 
       return FALSE;
@@ -3401,10 +3416,13 @@ corrupt_input:
 		{
 		  /* GDesc -> LE transition.
 		     It's originally something like:
-		     leaq x@tlsdesc(%rip), %rax
+		     leaq x@tlsdesc(%rip), %rax <--- LP64 mode.
+		     leal x@tlsdesc(%rip), %eax <--- X32 mode.
 
 		     Change it to:
-		     movl $x@tpoff, %rax.  */
+		     movq $x@tpoff, %rax <--- LP64 mode.
+		     rex movl $x@tpoff, %eax <--- X32 mode.
+		   */
 
 		  unsigned int val, type;
 
@@ -3412,7 +3430,8 @@ corrupt_input:
 		    goto corrupt_input;
 		  type = bfd_get_8 (input_bfd, contents + roff - 3);
 		  val = bfd_get_8 (input_bfd, contents + roff - 1);
-		  bfd_put_8 (output_bfd, 0x48 | ((type >> 2) & 1),
+		  bfd_put_8 (output_bfd,
+			     (type & 0x48) | ((type >> 2) & 1),
 			     contents + roff - 3);
 		  bfd_put_8 (output_bfd, 0xc7, contents + roff - 2);
 		  bfd_put_8 (output_bfd, 0xc0 | ((val >> 3) & 7),
@@ -3426,11 +3445,30 @@ corrupt_input:
 		{
 		  /* GDesc -> LE transition.
 		     It's originally:
-		     call *(%rax)
+		     call *(%rax) <--- LP64 mode.
+		     call *(%eax) <--- X32 mode.
 		     Turn it into:
-		     xchg %ax,%ax.  */
-		  bfd_put_8 (output_bfd, 0x66, contents + roff);
-		  bfd_put_8 (output_bfd, 0x90, contents + roff + 1);
+		     xchg %ax,%ax <-- LP64 mode.
+		     nopl (%rax)  <-- X32 mode.
+		   */
+		  unsigned int prefix = 0;
+		  if (!ABI_64_P (input_bfd))
+		    {
+		      /* Check for call *x@tlsdesc(%eax).  */
+		      if (contents[roff] == 0x67)
+			prefix = 1;
+		    }
+		  if (prefix)
+		    {
+		      bfd_put_8 (output_bfd, 0x0f, contents + roff);
+		      bfd_put_8 (output_bfd, 0x1f, contents + roff + 1);
+		      bfd_put_8 (output_bfd, 0x00, contents + roff + 2);
+		    }
+		  else
+		    {
+		      bfd_put_8 (output_bfd, 0x66, contents + roff);
+		      bfd_put_8 (output_bfd, 0x90, contents + roff + 1);
+		    }
 		  continue;
 		}
 	      else if (r_type == R_X86_64_GOTTPOFF)
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 8728725b82d..17c1e988260 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -8713,10 +8713,13 @@ output_insn (void)
 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
 	  /* For x32, add a dummy REX_OPCODE prefix for mov/add with
 	     R_X86_64_GOTTPOFF relocation so that linker can safely
-	     perform IE->LE optimization.  */
+	     perform IE->LE optimization.  A dummy REX_OPCODE prefix
+	     is also needed for lea with R_X86_64_GOTPC32_TLSDESC
+	     relocation for GDesc -> LE optimization.  */
 	  if (x86_elf_abi == X86_64_X32_ABI
 	      && i.operands == 2
-	      && i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
+	      && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
+		  || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
 	      && i.prefix[REX_PREFIX] == 0)
 	    add_prefix (REX_OPCODE);
 #endif
diff --git a/gas/testsuite/gas/i386/ilp32/x32-tls.d b/gas/testsuite/gas/i386/ilp32/x32-tls.d
index 1255829f8b7..ab4da5c730b 100644
--- a/gas/testsuite/gas/i386/ilp32/x32-tls.d
+++ b/gas/testsuite/gas/i386/ilp32/x32-tls.d
@@ -10,4 +10,6 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	4c 8b 25 00 00 00 00 	mov    0x0\(%rip\),%r12        # e <_start\+0xe>
 [ 	]*[a-f0-9]+:	40 03 05 00 00 00 00 	rex add 0x0\(%rip\),%eax        # 15 <_start\+0x15>
 [ 	]*[a-f0-9]+:	44 03 25 00 00 00 00 	add    0x0\(%rip\),%r12d        # 1c <_start\+0x1c>
+[ 	]*[a-f0-9]+:	40 8d 05 00 00 00 00 	rex lea 0x0\(%rip\),%eax        # 23 <_start\+0x23>
+[ 	]*[a-f0-9]+:	44 8d 25 00 00 00 00 	lea    0x0\(%rip\),%r12d        # 2a <_start\+0x2a>
 #pass
diff --git a/gas/testsuite/gas/i386/ilp32/x32-tls.s b/gas/testsuite/gas/i386/ilp32/x32-tls.s
index f9626cdfdd3..e1599be73c1 100644
--- a/gas/testsuite/gas/i386/ilp32/x32-tls.s
+++ b/gas/testsuite/gas/i386/ilp32/x32-tls.s
@@ -4,6 +4,8 @@ _start:
 	mov	foo@gottpoff(%rip), %r12
 	add	foo@gottpoff(%rip), %eax
 	add	foo@gottpoff(%rip), %r12d
+	lea	foo@tlsdesc(%rip), %eax
+	lea	foo@tlsdesc(%rip), %r12d
 	.globl	foo
 	.section	.tdata,"awT",@progbits
 	.align 4
diff --git a/ld/testsuite/ld-x86-64/pr25416-1.d b/ld/testsuite/ld-x86-64/pr25416-1.d
new file mode 100644
index 00000000000..14712e85fcd
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-1.d
@@ -0,0 +1,13 @@
+#as: --x32
+#ld: -melf32_x86_64
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	40 c7 c0 [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f][ \t]+rex mov \$0x[a-f0-9]+,%eax
+ +[a-f0-9]+:	0f 1f 00             	nopl   \(%rax\)
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-1.s b/ld/testsuite/ld-x86-64/pr25416-1.s
new file mode 100644
index 00000000000..66636c12981
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-1.s
@@ -0,0 +1,13 @@
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	lea	foo@TLSDESC(%rip), %eax
+	call	*foo@TLSCALL(%eax)
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-2.d b/ld/testsuite/ld-x86-64/pr25416-2.d
new file mode 100644
index 00000000000..e60c8222474
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-2.d
@@ -0,0 +1,13 @@
+#as: --x32
+#ld: -melf32_x86_64
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	48 c7 c0 [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f][ \t]+mov    \$0x[a-f0-9]+,%rax
+ +[a-f0-9]+:	66 90                	xchg   %ax,%ax
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-2.s b/ld/testsuite/ld-x86-64/pr25416-2.s
new file mode 100644
index 00000000000..b6dbb6d93ad
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-2.s
@@ -0,0 +1,13 @@
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	lea	foo@TLSDESC(%rip), %rax
+	call	*foo@TLSCALL(%rax)
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-3a.c b/ld/testsuite/ld-x86-64/pr25416-3a.c
new file mode 100644
index 00000000000..521c13b38a8
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-3a.c
@@ -0,0 +1,27 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+__thread int bar = 301;
+
+extern int *test1 (int);
+extern int *test2 (int);
+
+int
+main ()
+{
+  int *p;
+  p = test1 (30);
+  if (*p != 30)
+    abort ();
+  *p = 40;
+  test1 (40);
+  p = test2 (301);
+  if (*p != 301)
+    abort ();
+  if (p != &bar)
+    abort ();
+  *p = 40;
+  test2 (40);
+  puts ("PASS");
+  return 0;
+}
diff --git a/ld/testsuite/ld-x86-64/pr25416-3b.s b/ld/testsuite/ld-x86-64/pr25416-3b.s
new file mode 100644
index 00000000000..95a0226aa42
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-3b.s
@@ -0,0 +1,50 @@
+	.text
+	.p2align 4
+	.globl	test1
+	.type	test1, @function
+test1:
+	.cfi_startproc
+	subl	$8, %esp
+	.cfi_def_cfa_offset 16
+	lea	foo@TLSDESC(%rip), %eax
+	call	*foo@TLSCALL(%eax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L5
+	addl	$8, %esp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	ret
+.L5:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test1, .-test1
+	.p2align 4
+	.globl	test2
+	.type	test2, @function
+test2:
+	.cfi_startproc
+	subl	$8, %esp
+	.cfi_def_cfa_offset 16
+	lea	bar@TLSDESC(%rip), %eax
+	call	*bar@TLSCALL(%eax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L9
+	addl	$8, %esp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	ret
+.L9:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test2, .-test2
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-3c.s b/ld/testsuite/ld-x86-64/pr25416-3c.s
new file mode 100644
index 00000000000..94a64bad9ea
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-3c.s
@@ -0,0 +1,54 @@
+	.text
+	.p2align 4
+	.globl	test1
+	.type	test1, @function
+test1:
+	.cfi_startproc
+	subq	$8, %rsp
+	.cfi_def_cfa_offset 16
+	lea	foo@TLSDESC(%rip), %rax
+	call	*foo@TLSCALL(%rax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L5
+	movl	%eax, %r8d
+	addq	$8, %rsp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	movq	%r8, %rax
+	ret
+.L5:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test1, .-test1
+	.p2align 4
+	.globl	test2
+	.type	test2, @function
+test2:
+	.cfi_startproc
+	subq	$8, %rsp
+	.cfi_def_cfa_offset 16
+	lea	bar@TLSDESC(%rip), %rax
+	call	*bar@TLSCALL(%rax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L9
+	movl	%eax, %r8d
+	addq	$8, %rsp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	movq	%r8, %rax
+	ret
+.L9:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test2, .-test2
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/x86-64.exp b/ld/testsuite/ld-x86-64/x86-64.exp
index 88f75e0e431..535b65f7bdf 100644
--- a/ld/testsuite/ld-x86-64/x86-64.exp
+++ b/ld/testsuite/ld-x86-64/x86-64.exp
@@ -461,6 +461,8 @@ run_dump_test "pr24721-x32"
 run_dump_test "pr24905"
 run_dump_test "pr24905-x32"
 run_dump_test "align-branch-1"
+run_dump_test "pr25416-1"
+run_dump_test "pr25416-2"
 
 if { ![istarget "x86_64-*-linux*"] && ![istarget "x86_64-*-nacl*"]} {
     return
@@ -1302,6 +1304,37 @@ if { [isnative] && [check_compiler_available] } {
     ]
 
     if  {[istarget "x86_64-*-linux*-gnux32"]} {
+	run_cc_link_tests [list \
+	    [list \
+		"Build pr25416-3b.o" \
+		"" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-3b.s } \
+	    ] \
+	    [list \
+		"Build pr25416-3b.so" \
+		"-shared" \
+		"-fPIC -Wa,-mx86-used-note=yes" \
+		{ pr25416-3b.s } \
+		{} \
+		"pr25416-3b.so" \
+	    ] \
+	    [list \
+		"Build pr25416-3c.o" \
+		"" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-3c.s } \
+	    ] \
+	    [list \
+		"Build pr25416-3c.so" \
+		"-shared" \
+		"-fPIC -Wa,-mx86-used-note=yes" \
+		{ pr25416-3b.s } \
+		{} \
+		"pr25416-3c.so" \
+	    ] \
+	]
+
 	run_ld_link_exec_tests [list \
 	    [list \
 		"Run pr22001-1b" \
@@ -1321,6 +1354,42 @@ if { [isnative] && [check_compiler_available] } {
 		"pass.out" \
 		"$NOPIE_CFLAGS" \
 	    ] \
+	    [list \
+		"Run pr25416-3a" \
+		"$NOPIE_LDFLAGS -Wl,--no-as-needed tmpdir/pr25416-3b.so" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-3a.c } \
+		"pr25416-3a" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-3b" \
+		"$NOPIE_LDFLAGS tmpdir/pr25416-3b.o" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-3a.c } \
+		"pr25416-3b" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-3c" \
+		"$NOPIE_LDFLAGS -Wl,--no-as-needed tmpdir/pr25416-3c.so" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-3a.c } \
+		"pr25416-3c" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-3d" \
+		"$NOPIE_LDFLAGS tmpdir/pr25416-3c.o" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-3a.c } \
+		"pr25416-3d" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
 	]
     } else {
 	run_cc_link_tests [list \
H.J. Lu Jan. 19, 2020, 7:52 p.m. | #2
On Sat, Jan 18, 2020 at 6:44 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>

> On Sat, Jan 18, 2020 at 3:07 PM H.J. Lu <hjl.tools@gmail.com> wrote:

> >

> > For X32, TLSDESC sequences can be

> >

> > 8d 05 00 00 00 00       lea     x@TLSDESC(%rip), %eax

> > 67 ff 10                call    *x@TLSCALL(%eax)

> >

>

> For x32, we must encode "lea x@TLSDESC(%rip), %reg" with a REX prefix

> even if it isn't required.  Otherwise linker can’t safely perform GDesc -> LE

> optimization.  If the lea encoding has a variable length, linker can't

> tell where

> it starts.  Here is the updated patch to always generate a REX prefix.

>


Here is the updated patch to handle  GDesc -> IE optimization.
BTW, I posted x86-64 psABI update at

https://gitlab.com/x86-psABIs/x86-64-ABI/merge_requests/4


-- 
H.J.
From ad02976b76f554e5b5749654cb08161e4871bcbe Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sat, 18 Jan 2020 13:18:21 -0800
Subject: [PATCH] x86-64: Fix TLSDESC relaxation for x32
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For x32, we must encode "lea x@TLSDESC(%rip), %reg" with a REX prefix
even if it isn't required.  Otherwise linker can’t safely perform
GDesc -> IE/LE optimization.  X32 TLSDESC sequences can be:

40 8d 05 00 00 00 00	rex lea	x@TLSDESC(%rip), %reg
...
67 ff 10		call	*x@TLSCALL(%eax)

or the same sequence as LP64:

48 8d 05 00 00 00 00	lea	foo@TLSDESC(%rip), %reg
...
ff 10			call	*foo@TLSCALL(%rax)

We need to support both sequences for x32.  For both GDesc -> IE/LE
transitions,

67 ff 10		call	*x@TLSCALL(%eax)

should relaxed to

0f 1f 00		nopl	(%rax)

For GDesc -> LE transition,

40 8d 05 00 00 00 00	rex lea	x@TLSDESC(%rip), %reg

should relaxed to

40 c7 c0 fc ff ff ff	rex movl $x@tpoff, %reg

For GDesc -> IE transition,

40 8d 05 00 00 00 00	rex lea	x@TLSDESC(%rip), %reg

should relaxed to

40 8b 05 00 00 00 00	rex movl x@gottpoff(%rip), %eax

bfd/

	PR ld/25416
	* elf64-x86-64.c (elf_x86_64_check_tls_transition): Support
	"rex leal x@tlsdesc(%rip), %reg" and "call *x@tlsdesc(%eax)" in
	X32 mode.
	(elf_x86_64_relocate_section): In x32 mode, for GDesc -> LE
	transition, relax "rex leal x@tlsdesc(%rip), %reg" to
	"rex movl $x@tpoff, %reg", for GDesc -> IE transition, relax
	"rex leal x@tlsdesc(%rip), %reg" to
	"rex movl x@gottpoff(%rip), %eax".  For both transitions, relax
	"call *(%eax)" to "nopl (%rax)".

gas/

	PR ld/25416
	* config/tc-i386.c (output_insn): Add a dummy REX_OPCODE prefix
	for lea with R_X86_64_GOTPC32_TLSDESC relocation when generating
	x32 object.
	* testsuite/gas/i386/ilp32/x32-tls.d: Updated.
	* testsuite/gas/i386/ilp32/x32-tls.s: Add tests for lea with
	R_X86_64_GOTPC32_TLSDESC relocation.

ld/

	PR ld/25416
	* testsuite/ld-x86-64/pr25416-1.d: New file.
	* testsuite/ld-x86-64/pr25416-1.s: Likewise.
	* testsuite/ld-x86-64/pr25416-1.s: Likewise.
	* testsuite/ld-x86-64/pr25416-2.d: Likewise.
	* testsuite/ld-x86-64/pr25416-2.s: Likewise.
	* testsuite/ld-x86-64/pr25416-3.d: Likewise.
	* testsuite/ld-x86-64/pr25416-3.s: Likewise.
	* testsuite/ld-x86-64/pr25416-4.d: Likewise.
	* testsuite/ld-x86-64/pr25416-4.s: Likewise.
	* testsuite/ld-x86-64/pr25416-5a.c: Likewise.
	* testsuite/ld-x86-64/pr25416-5b.s: Likewise.
	* testsuite/ld-x86-64/pr25416-5c.s: Likewise.
	* testsuite/ld-x86-64/pr25416-5d.s: Likewise.
	* testsuite/ld-x86-64/pr25416-5e.s: Likewise.
	* testsuite/ld-x86-64/x86-64.exp: Run PR ld/25416 tests.
---
 bfd/elf64-x86-64.c                     | 100 +++++++++++++++++-----
 gas/config/tc-i386.c                   |   7 +-
 gas/testsuite/gas/i386/ilp32/x32-tls.d |   2 +
 gas/testsuite/gas/i386/ilp32/x32-tls.s |   2 +
 ld/testsuite/ld-x86-64/pr25416-1.d     |  13 +++
 ld/testsuite/ld-x86-64/pr25416-1.s     |  13 +++
 ld/testsuite/ld-x86-64/pr25416-2.d     |  13 +++
 ld/testsuite/ld-x86-64/pr25416-2.s     |  13 +++
 ld/testsuite/ld-x86-64/pr25416-3.d     |  15 ++++
 ld/testsuite/ld-x86-64/pr25416-3.s     |  15 ++++
 ld/testsuite/ld-x86-64/pr25416-4.d     |  15 ++++
 ld/testsuite/ld-x86-64/pr25416-4.s     |  15 ++++
 ld/testsuite/ld-x86-64/pr25416-5a.c    |  33 ++++++++
 ld/testsuite/ld-x86-64/pr25416-5b.s    |  52 ++++++++++++
 ld/testsuite/ld-x86-64/pr25416-5c.s    |  56 +++++++++++++
 ld/testsuite/ld-x86-64/pr25416-5d.s    |  19 +++++
 ld/testsuite/ld-x86-64/pr25416-5e.s    |  23 ++++++
 ld/testsuite/ld-x86-64/x86-64.exp      | 110 +++++++++++++++++++++++++
 18 files changed, 495 insertions(+), 21 deletions(-)
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-1.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-1.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-2.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-2.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-3.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-3.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-4.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-4.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-5a.c
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-5b.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-5c.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-5d.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-5e.s

diff --git a/bfd/elf64-x86-64.c b/bfd/elf64-x86-64.c
index 79e68ff4767..014bea14754 100644
--- a/bfd/elf64-x86-64.c
+++ b/bfd/elf64-x86-64.c
@@ -1223,7 +1223,8 @@ elf_x86_64_check_tls_transition (bfd *abfd,
 
     case R_X86_64_GOTPC32_TLSDESC:
       /* Check transition from GDesc access model:
-		leaq x@tlsdesc(%rip), %rax
+		leaq x@tlsdesc(%rip), %rax <--- LP64 mode.
+		rex leal x@tlsdesc(%rip), %eax <--- X32 mode.
 
 	 Make sure it's a leaq adding rip to a 32-bit offset
 	 into any register, although it's probably almost always
@@ -1233,7 +1234,8 @@ elf_x86_64_check_tls_transition (bfd *abfd,
 	return FALSE;
 
       val = bfd_get_8 (abfd, contents + offset - 3);
-      if ((val & 0xfb) != 0x48)
+      val &= 0xfb;
+      if (val != 0x48 && (ABI_64_P (abfd) || val != 0x40))
 	return FALSE;
 
       if (bfd_get_8 (abfd, contents + offset - 2) != 0x8d)
@@ -1244,13 +1246,26 @@ elf_x86_64_check_tls_transition (bfd *abfd,
 
     case R_X86_64_TLSDESC_CALL:
       /* Check transition from GDesc access model:
-		call *x@tlsdesc(%rax)
+		call *x@tlsdesc(%rax) <--- LP64 mode.
+		call *x@tlsdesc(%eax) <--- X32 mode.
        */
       if (offset + 2 <= sec->size)
 	{
-	  /* Make sure that it's a call *x@tlsdesc(%rax).  */
+	  unsigned int prefix;
 	  call = contents + offset;
-	  return call[0] == 0xff && call[1] == 0x10;
+	  prefix = 0;
+	  if (!ABI_64_P (abfd))
+	    {
+	      /* Check for call *x@tlsdesc(%eax).  */
+	      if (call[0] == 0x67)
+		{
+		  prefix = 1;
+		  if (offset + 3 > sec->size)
+		    return FALSE;
+		}
+	    }
+	  /* Make sure that it's a call *x@tlsdesc(%rax).  */
+	  return call[prefix] == 0xff && call[1 + prefix] == 0x10;
 	}
 
       return FALSE;
@@ -3401,10 +3416,13 @@ corrupt_input:
 		{
 		  /* GDesc -> LE transition.
 		     It's originally something like:
-		     leaq x@tlsdesc(%rip), %rax
+		     leaq x@tlsdesc(%rip), %rax <--- LP64 mode.
+		     rex leal x@tlsdesc(%rip), %eax <--- X32 mode.
 
 		     Change it to:
-		     movl $x@tpoff, %rax.  */
+		     movq $x@tpoff, %rax <--- LP64 mode.
+		     rex movl $x@tpoff, %eax <--- X32 mode.
+		   */
 
 		  unsigned int val, type;
 
@@ -3412,7 +3430,8 @@ corrupt_input:
 		    goto corrupt_input;
 		  type = bfd_get_8 (input_bfd, contents + roff - 3);
 		  val = bfd_get_8 (input_bfd, contents + roff - 1);
-		  bfd_put_8 (output_bfd, 0x48 | ((type >> 2) & 1),
+		  bfd_put_8 (output_bfd,
+			     (type & 0x48) | ((type >> 2) & 1),
 			     contents + roff - 3);
 		  bfd_put_8 (output_bfd, 0xc7, contents + roff - 2);
 		  bfd_put_8 (output_bfd, 0xc0 | ((val >> 3) & 7),
@@ -3426,11 +3445,30 @@ corrupt_input:
 		{
 		  /* GDesc -> LE transition.
 		     It's originally:
-		     call *(%rax)
+		     call *(%rax) <--- LP64 mode.
+		     call *(%eax) <--- X32 mode.
 		     Turn it into:
-		     xchg %ax,%ax.  */
-		  bfd_put_8 (output_bfd, 0x66, contents + roff);
-		  bfd_put_8 (output_bfd, 0x90, contents + roff + 1);
+		     xchg %ax,%ax <-- LP64 mode.
+		     nopl (%rax)  <-- X32 mode.
+		   */
+		  unsigned int prefix = 0;
+		  if (!ABI_64_P (input_bfd))
+		    {
+		      /* Check for call *x@tlsdesc(%eax).  */
+		      if (contents[roff] == 0x67)
+			prefix = 1;
+		    }
+		  if (prefix)
+		    {
+		      bfd_put_8 (output_bfd, 0x0f, contents + roff);
+		      bfd_put_8 (output_bfd, 0x1f, contents + roff + 1);
+		      bfd_put_8 (output_bfd, 0x00, contents + roff + 2);
+		    }
+		  else
+		    {
+		      bfd_put_8 (output_bfd, 0x66, contents + roff);
+		      bfd_put_8 (output_bfd, 0x90, contents + roff + 1);
+		    }
 		  continue;
 		}
 	      else if (r_type == R_X86_64_GOTTPOFF)
@@ -3741,13 +3779,18 @@ corrupt_input:
 		{
 		  /* GDesc -> IE transition.
 		     It's originally something like:
-		     leaq x@tlsdesc(%rip), %rax
+		     leaq x@tlsdesc(%rip), %rax <--- LP64 mode.
+		     rex leal x@tlsdesc(%rip), %eax <--- X32 mode.
 
 		     Change it to:
-		     movq x@gottpoff(%rip), %rax # before xchg %ax,%ax.  */
+		     # before xchg %ax,%ax in LP64 mode.
+		     movq x@gottpoff(%rip), %rax
+		     # before nopl (%rax) in X32 mode.
+		     rex movl x@gottpoff(%rip), %eax
+		  */
 
 		  /* Now modify the instruction as appropriate. To
-		     turn a leaq into a movq in the form we use it, it
+		     turn a lea into a mov in the form we use it, it
 		     suffices to change the second byte from 0x8d to
 		     0x8b.  */
 		  if (roff < 2)
@@ -3768,13 +3811,32 @@ corrupt_input:
 		{
 		  /* GDesc -> IE transition.
 		     It's originally:
-		     call *(%rax)
+		     call *(%rax) <--- LP64 mode.
+		     call *(%eax) <--- X32 mode.
 
 		     Change it to:
-		     xchg %ax, %ax.  */
+		     xchg %ax, %ax <-- LP64 mode.
+		     nopl (%rax)  <-- X32 mode.
+		   */
 
-		  bfd_put_8 (output_bfd, 0x66, contents + roff);
-		  bfd_put_8 (output_bfd, 0x90, contents + roff + 1);
+		  unsigned int prefix = 0;
+		  if (!ABI_64_P (input_bfd))
+		    {
+		      /* Check for call *x@tlsdesc(%eax).  */
+		      if (contents[roff] == 0x67)
+			prefix = 1;
+		    }
+		  if (prefix)
+		    {
+		      bfd_put_8 (output_bfd, 0x0f, contents + roff);
+		      bfd_put_8 (output_bfd, 0x1f, contents + roff + 1);
+		      bfd_put_8 (output_bfd, 0x00, contents + roff + 2);
+		    }
+		  else
+		    {
+		      bfd_put_8 (output_bfd, 0x66, contents + roff);
+		      bfd_put_8 (output_bfd, 0x90, contents + roff + 1);
+		    }
 		  continue;
 		}
 	      else
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 8728725b82d..17c1e988260 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -8713,10 +8713,13 @@ output_insn (void)
 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
 	  /* For x32, add a dummy REX_OPCODE prefix for mov/add with
 	     R_X86_64_GOTTPOFF relocation so that linker can safely
-	     perform IE->LE optimization.  */
+	     perform IE->LE optimization.  A dummy REX_OPCODE prefix
+	     is also needed for lea with R_X86_64_GOTPC32_TLSDESC
+	     relocation for GDesc -> LE optimization.  */
 	  if (x86_elf_abi == X86_64_X32_ABI
 	      && i.operands == 2
-	      && i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
+	      && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
+		  || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
 	      && i.prefix[REX_PREFIX] == 0)
 	    add_prefix (REX_OPCODE);
 #endif
diff --git a/gas/testsuite/gas/i386/ilp32/x32-tls.d b/gas/testsuite/gas/i386/ilp32/x32-tls.d
index 1255829f8b7..ab4da5c730b 100644
--- a/gas/testsuite/gas/i386/ilp32/x32-tls.d
+++ b/gas/testsuite/gas/i386/ilp32/x32-tls.d
@@ -10,4 +10,6 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	4c 8b 25 00 00 00 00 	mov    0x0\(%rip\),%r12        # e <_start\+0xe>
 [ 	]*[a-f0-9]+:	40 03 05 00 00 00 00 	rex add 0x0\(%rip\),%eax        # 15 <_start\+0x15>
 [ 	]*[a-f0-9]+:	44 03 25 00 00 00 00 	add    0x0\(%rip\),%r12d        # 1c <_start\+0x1c>
+[ 	]*[a-f0-9]+:	40 8d 05 00 00 00 00 	rex lea 0x0\(%rip\),%eax        # 23 <_start\+0x23>
+[ 	]*[a-f0-9]+:	44 8d 25 00 00 00 00 	lea    0x0\(%rip\),%r12d        # 2a <_start\+0x2a>
 #pass
diff --git a/gas/testsuite/gas/i386/ilp32/x32-tls.s b/gas/testsuite/gas/i386/ilp32/x32-tls.s
index f9626cdfdd3..e1599be73c1 100644
--- a/gas/testsuite/gas/i386/ilp32/x32-tls.s
+++ b/gas/testsuite/gas/i386/ilp32/x32-tls.s
@@ -4,6 +4,8 @@ _start:
 	mov	foo@gottpoff(%rip), %r12
 	add	foo@gottpoff(%rip), %eax
 	add	foo@gottpoff(%rip), %r12d
+	lea	foo@tlsdesc(%rip), %eax
+	lea	foo@tlsdesc(%rip), %r12d
 	.globl	foo
 	.section	.tdata,"awT",@progbits
 	.align 4
diff --git a/ld/testsuite/ld-x86-64/pr25416-1.d b/ld/testsuite/ld-x86-64/pr25416-1.d
new file mode 100644
index 00000000000..14712e85fcd
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-1.d
@@ -0,0 +1,13 @@
+#as: --x32
+#ld: -melf32_x86_64
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	40 c7 c0 [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f][ \t]+rex mov \$0x[a-f0-9]+,%eax
+ +[a-f0-9]+:	0f 1f 00             	nopl   \(%rax\)
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-1.s b/ld/testsuite/ld-x86-64/pr25416-1.s
new file mode 100644
index 00000000000..66636c12981
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-1.s
@@ -0,0 +1,13 @@
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	lea	foo@TLSDESC(%rip), %eax
+	call	*foo@TLSCALL(%eax)
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-2.d b/ld/testsuite/ld-x86-64/pr25416-2.d
new file mode 100644
index 00000000000..e60c8222474
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-2.d
@@ -0,0 +1,13 @@
+#as: --x32
+#ld: -melf32_x86_64
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	48 c7 c0 [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f][ \t]+mov    \$0x[a-f0-9]+,%rax
+ +[a-f0-9]+:	66 90                	xchg   %ax,%ax
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-2.s b/ld/testsuite/ld-x86-64/pr25416-2.s
new file mode 100644
index 00000000000..b6dbb6d93ad
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-2.s
@@ -0,0 +1,13 @@
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	lea	foo@TLSDESC(%rip), %rax
+	call	*foo@TLSCALL(%rax)
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-3.d b/ld/testsuite/ld-x86-64/pr25416-3.d
new file mode 100644
index 00000000000..fbf330f5796
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-3.d
@@ -0,0 +1,15 @@
+#as: --x32
+#ld: -melf32_x86_64 -shared
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	40 8b 05 [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f][ \t]+rex mov 0x[a-f0-9]+\(%rip\),%eax[ \t]+# [a-f0-9]+ <.got>
+ +[a-f0-9]+:	0f 1f 00             	nopl   \(%rax\)
+ +[a-f0-9]+:	64 8b 0c 25 00 00 00 00 	mov    %fs:0x0,%ecx
+ +[a-f0-9]+:	40 03 0d [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f][ \t]+rex add 0x[a-f0-9]+\(%rip\),%ecx[ \t]+# [a-f0-9]+ <.got>
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-3.s b/ld/testsuite/ld-x86-64/pr25416-3.s
new file mode 100644
index 00000000000..949b1239ed2
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-3.s
@@ -0,0 +1,15 @@
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	lea	foo@TLSDESC(%rip), %eax
+	call	*foo@TLSCALL(%eax)
+	movl	%fs:0, %ecx
+	addl	foo@gottpoff(%rip), %ecx
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-4.d b/ld/testsuite/ld-x86-64/pr25416-4.d
new file mode 100644
index 00000000000..711ec0ea7c0
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-4.d
@@ -0,0 +1,15 @@
+#as: --x32
+#ld: -melf32_x86_64 -shared
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	48 8b 05 [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f][ \t]+mov    0x[a-f0-9]+\(%rip\),%rax[ \t]+# [a-f0-9]+ <.got>
+ +[a-f0-9]+:	66 90                	xchg   %ax,%ax
+ +[a-f0-9]+:	64 8b 0c 25 00 00 00 00 	mov    %fs:0x0,%ecx
+ +[a-f0-9]+:	40 03 0d [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f][ \t]+rex add 0x[a-f0-9]+\(%rip\),%ecx[ \t]+# [a-f0-9]+ <.got>
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-4.s b/ld/testsuite/ld-x86-64/pr25416-4.s
new file mode 100644
index 00000000000..5120af466c7
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-4.s
@@ -0,0 +1,15 @@
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	lea	foo@TLSDESC(%rip), %rax
+	call	*foo@TLSCALL(%rax)
+	movl	%fs:0, %ecx
+	addl	foo@gottpoff(%rip), %ecx
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-5a.c b/ld/testsuite/ld-x86-64/pr25416-5a.c
new file mode 100644
index 00000000000..9d820a1911d
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-5a.c
@@ -0,0 +1,33 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+__thread int bar = 301;
+
+extern int *test1 (int);
+extern int *test2 (int);
+extern int *test3 (int);
+
+int
+main ()
+{
+  int *p;
+  p = test1 (30);
+  if (*p != 30)
+    abort ();
+  *p = 40;
+  test1 (40);
+  p = test2 (301);
+  if (*p != 301)
+    abort ();
+  if (p != &bar)
+    abort ();
+  *p = 40;
+  test2 (40);
+  p = test3 (40);
+  if (*p != 40)
+    abort ();
+  *p = 50;
+  test3 (50);
+  puts ("PASS");
+  return 0;
+}
diff --git a/ld/testsuite/ld-x86-64/pr25416-5b.s b/ld/testsuite/ld-x86-64/pr25416-5b.s
new file mode 100644
index 00000000000..8edd248c4fe
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-5b.s
@@ -0,0 +1,52 @@
+	.text
+	.p2align 4
+	.globl	test1
+	.type	test1, @function
+test1:
+	.cfi_startproc
+	subl	$8, %esp
+	.cfi_def_cfa_offset 16
+	lea	foo@TLSDESC(%rip), %eax
+	call	*foo@TLSCALL(%eax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L5
+	addl	$8, %esp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	ret
+.L5:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test1, .-test1
+	.p2align 4
+	.globl	test2
+	.type	test2, @function
+test2:
+	.cfi_startproc
+	subl	$8, %esp
+	.cfi_def_cfa_offset 16
+	lea	bar@TLSDESC(%rip), %eax
+	call	*bar@TLSCALL(%eax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L9
+	addl	$8, %esp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	ret
+.L9:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test2, .-test2
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.hidden foo
+	.globl foo
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-5c.s b/ld/testsuite/ld-x86-64/pr25416-5c.s
new file mode 100644
index 00000000000..f42a8a0e6b3
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-5c.s
@@ -0,0 +1,56 @@
+	.text
+	.p2align 4
+	.globl	test1
+	.type	test1, @function
+test1:
+	.cfi_startproc
+	subq	$8, %rsp
+	.cfi_def_cfa_offset 16
+	lea	foo@TLSDESC(%rip), %rax
+	call	*foo@TLSCALL(%rax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L5
+	movl	%eax, %r8d
+	addq	$8, %rsp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	movq	%r8, %rax
+	ret
+.L5:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test1, .-test1
+	.p2align 4
+	.globl	test2
+	.type	test2, @function
+test2:
+	.cfi_startproc
+	subq	$8, %rsp
+	.cfi_def_cfa_offset 16
+	lea	bar@TLSDESC(%rip), %rax
+	call	*bar@TLSCALL(%rax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L9
+	movl	%eax, %r8d
+	addq	$8, %rsp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	movq	%r8, %rax
+	ret
+.L9:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test2, .-test2
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.hidden foo
+	.globl foo
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-5d.s b/ld/testsuite/ld-x86-64/pr25416-5d.s
new file mode 100644
index 00000000000..8d36733d9bf
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-5d.s
@@ -0,0 +1,19 @@
+	.text
+	.p2align 4
+	.globl	test3
+	.type	test3, @function
+test3:
+	.cfi_startproc
+	movl	%fs:0, %eax
+	addq	foo@gottpoff(%rip), %rax
+	cmpl	%edi, (%eax)
+	jne	.L7
+	movl	%eax, %eax
+	ret
+.L7:
+	pushq	%rax
+	.cfi_def_cfa_offset 16
+	call	abort@PLT
+	.cfi_endproc
+	.size	test3, .-test3
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-5e.s b/ld/testsuite/ld-x86-64/pr25416-5e.s
new file mode 100644
index 00000000000..fe0c0e8b111
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-5e.s
@@ -0,0 +1,23 @@
+	.text
+	.p2align 4
+	.globl	test3
+	.type	test3, @function
+test3:
+	.cfi_startproc
+	subl	$8, %esp
+	.cfi_def_cfa_offset 16
+	lea	foo@TLSDESC(%rip), %eax
+	call	*foo@TLSCALL(%eax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L5
+	addl	$8, %esp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	ret
+.L5:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test3, .-test3
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/x86-64.exp b/ld/testsuite/ld-x86-64/x86-64.exp
index 88f75e0e431..f5542066fb9 100644
--- a/ld/testsuite/ld-x86-64/x86-64.exp
+++ b/ld/testsuite/ld-x86-64/x86-64.exp
@@ -461,6 +461,10 @@ run_dump_test "pr24721-x32"
 run_dump_test "pr24905"
 run_dump_test "pr24905-x32"
 run_dump_test "align-branch-1"
+run_dump_test "pr25416-1"
+run_dump_test "pr25416-2"
+run_dump_test "pr25416-3"
+run_dump_test "pr25416-4"
 
 if { ![istarget "x86_64-*-linux*"] && ![istarget "x86_64-*-nacl*"]} {
     return
@@ -1302,6 +1306,51 @@ if { [isnative] && [check_compiler_available] } {
     ]
 
     if  {[istarget "x86_64-*-linux*-gnux32"]} {
+	run_cc_link_tests [list \
+	    [list \
+		"Build pr25416-5b.o (GDesc -maddress-mode=short)" \
+		"" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5b.s } \
+	    ] \
+	    [list \
+		"Build pr25416-5b.so (GDesc and IE -maddress-mode=short)" \
+		"-shared" \
+		"-fPIC -Wa,-mx86-used-note=yes" \
+		{ pr25416-5b.s pr25416-5d.s } \
+		{} \
+		"pr25416-5b.so" \
+	    ] \
+	    [list \
+		"Build pr25416-5c.o (GDesc -maddress-mode=long)" \
+		"" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5c.s } \
+	    ] \
+	    [list \
+		"Build pr25416-5c.so (GDesc and IE -maddress-mode=long)" \
+		"-shared" \
+		"-fPIC -Wa,-mx86-used-note=yes" \
+		{ pr25416-5c.s pr25416-5d.s } \
+		{} \
+		"pr25416-5c.so" \
+	    ] \
+	    [list \
+		"Build pr25416-5d.so (GDesc -maddress-mode=short)" \
+		"-shared" \
+		"-fPIC -Wa,-mx86-used-note=yes" \
+		{ pr25416-5b.s pr25416-5e.s } \
+		{} \
+		"pr25416-5d.so" \
+	    ] \
+	    [list \
+		"Build pr25416-5d.o (IE -maddress-mode=short)" \
+		"" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5d.s } \
+	    ] \
+	]
+
 	run_ld_link_exec_tests [list \
 	    [list \
 		"Run pr22001-1b" \
@@ -1321,6 +1370,67 @@ if { [isnative] && [check_compiler_available] } {
 		"pass.out" \
 		"$NOPIE_CFLAGS" \
 	    ] \
+	    [list \
+		"Run pr25416-5a (GDesc and IE -maddress-mode=short)" \
+		"$NOPIE_LDFLAGS -Wl,--no-as-needed tmpdir/pr25416-5b.so" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5a" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-5b (GDesc and LE -maddress-mode=short" \
+		"$NOPIE_LDFLAGS tmpdir/pr25416-5b.o tmpdir/pr25416-5d.o" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5b" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-5c (GDesc and IE -maddress-mode=long)" \
+		"$NOPIE_LDFLAGS -Wl,--no-as-needed tmpdir/pr25416-5c.so" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5c" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-5d (GDesc and LE -maddress-mode=long)" \
+		"$NOPIE_LDFLAGS tmpdir/pr25416-5c.o tmpdir/pr25416-5d.o" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5d" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-5e (GDesc -maddress-mode=short)" \
+		"$NOPIE_LDFLAGS -Wl,--no-as-needed tmpdir/pr25416-5d.so" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5e" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-5f (PIE GDesc and LE -maddress-mode=short)" \
+		"-pie tmpdir/pr25416-5b.o tmpdir/pr25416-5d.o" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5f" \
+		"pass.out" \
+	    ] \
+	    [list \
+		"Run pr25416-5g (PIE GDesc and LE -maddress-mode=long)" \
+		"-pie tmpdir/pr25416-5c.o tmpdir/pr25416-5d.o" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5g" \
+		"pass.out" \
+	    ] \
 	]
     } else {
 	run_cc_link_tests [list \
H.J. Lu Jan. 20, 2020, 2:55 p.m. | #3
On Sun, Jan 19, 2020 at 11:52 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>

> On Sat, Jan 18, 2020 at 6:44 PM H.J. Lu <hjl.tools@gmail.com> wrote:

> >

> > On Sat, Jan 18, 2020 at 3:07 PM H.J. Lu <hjl.tools@gmail.com> wrote:

> > >

> > > For X32, TLSDESC sequences can be

> > >

> > > 8d 05 00 00 00 00       lea     x@TLSDESC(%rip), %eax

> > > 67 ff 10                call    *x@TLSCALL(%eax)

> > >

> >

> > For x32, we must encode "lea x@TLSDESC(%rip), %reg" with a REX prefix

> > even if it isn't required.  Otherwise linker can’t safely perform GDesc -> LE

> > optimization.  If the lea encoding has a variable length, linker can't

> > tell where

> > it starts.  Here is the updated patch to always generate a REX prefix.

> >

>

> Here is the updated patch to handle  GDesc -> IE optimization.

> BTW, I posted x86-64 psABI update at

>

> https://gitlab.com/x86-psABIs/x86-64-ABI/merge_requests/4

>


This is the patch I am checking in.


-- 
H.J.
From 43a991bf3a52a4666ebb2c2f2af41974640b7c06 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sat, 18 Jan 2020 13:18:21 -0800
Subject: [PATCH] x86-64: Fix TLSDESC relaxation for x32
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For x32, we must encode "lea x@TLSDESC(%rip), %reg" with a REX prefix
even if it isn't required.  Otherwise linker can’t safely perform
GDesc -> IE/LE optimization.  X32 TLSDESC sequences can be:

40 8d 05 00 00 00 00	rex lea	x@TLSDESC(%rip), %reg
...
67 ff 10		call	*x@TLSCALL(%eax)

or the same sequence as LP64:

48 8d 05 00 00 00 00	lea	foo@TLSDESC(%rip), %reg
...
ff 10			call	*foo@TLSCALL(%rax)

We need to support both sequences for x32.  For both GDesc -> IE/LE
transitions,

67 ff 10		call	*x@TLSCALL(%eax)

should relaxed to

0f 1f 00		nopl	(%rax)

For GDesc -> LE transition,

40 8d 05 00 00 00 00	rex lea	x@TLSDESC(%rip), %reg

should relaxed to

40 c7 c0 fc ff ff ff	rex movl $x@tpoff, %reg

For GDesc -> IE transition,

40 8d 05 00 00 00 00	rex lea	x@TLSDESC(%rip), %reg

should relaxed to

40 8b 05 00 00 00 00	rex movl x@gottpoff(%rip), %eax

bfd/

	PR ld/25416
	* elf64-x86-64.c (elf_x86_64_check_tls_transition): Support
	"rex leal x@tlsdesc(%rip), %reg" and "call *x@tlsdesc(%eax)" in
	X32 mode.
	(elf_x86_64_relocate_section): In x32 mode, for GDesc -> LE
	transition, relax "rex leal x@tlsdesc(%rip), %reg" to
	"rex movl $x@tpoff, %reg", for GDesc -> IE transition, relax
	"rex leal x@tlsdesc(%rip), %reg" to
	"rex movl x@gottpoff(%rip), %eax".  For both transitions, relax
	"call *(%eax)" to "nopl (%rax)".

gas/

	PR ld/25416
	* config/tc-i386.c (output_insn): Add a dummy REX_OPCODE prefix
	for lea with R_X86_64_GOTPC32_TLSDESC relocation when generating
	x32 object.
	* testsuite/gas/i386/ilp32/x32-tls.d: Updated.
	* testsuite/gas/i386/ilp32/x32-tls.s: Add tests for lea with
	R_X86_64_GOTPC32_TLSDESC relocation.

ld/

	PR ld/25416
	* testsuite/ld-x86-64/pr25416-1.s: New file
	* testsuite/ld-x86-64/pr25416-1a.d: Likewise.
	* testsuite/ld-x86-64/pr25416-1b.d: Likewise.
	* testsuite/ld-x86-64/pr25416-1.s: Likewise.
	* testsuite/ld-x86-64/pr25416-2.s: Likewise.
	* testsuite/ld-x86-64/pr25416-2a.d: Likewise.
	* testsuite/ld-x86-64/pr25416-2b.d: Likewise.
	* testsuite/ld-x86-64/pr25416-3.d: Likewise.
	* testsuite/ld-x86-64/pr25416-3.s: Likewise.
	* testsuite/ld-x86-64/pr25416-4.d: Likewise.
	* testsuite/ld-x86-64/pr25416-4.s: Likewise.
	* testsuite/ld-x86-64/pr25416-5a.c: Likewise.
	* testsuite/ld-x86-64/pr25416-5b.s: Likewise.
	* testsuite/ld-x86-64/pr25416-5c.s: Likewise.
	* testsuite/ld-x86-64/pr25416-5d.s: Likewise.
	* testsuite/ld-x86-64/pr25416-5e.s: Likewise.
	* testsuite/ld-x86-64/x86-64.exp: Run PR ld/25416 tests.
---
 bfd/elf64-x86-64.c                     | 100 +++++++++++++++++-----
 gas/config/tc-i386.c                   |   7 +-
 gas/testsuite/gas/i386/ilp32/x32-tls.d |   2 +
 gas/testsuite/gas/i386/ilp32/x32-tls.s |   2 +
 ld/testsuite/ld-x86-64/pr25416-1.s     |  13 +++
 ld/testsuite/ld-x86-64/pr25416-1a.d    |  16 ++++
 ld/testsuite/ld-x86-64/pr25416-1b.d    |  15 ++++
 ld/testsuite/ld-x86-64/pr25416-2.s     |  13 +++
 ld/testsuite/ld-x86-64/pr25416-2a.d    |  16 ++++
 ld/testsuite/ld-x86-64/pr25416-2b.d    |  15 ++++
 ld/testsuite/ld-x86-64/pr25416-3.d     |  16 ++++
 ld/testsuite/ld-x86-64/pr25416-3.s     |  15 ++++
 ld/testsuite/ld-x86-64/pr25416-4.d     |  16 ++++
 ld/testsuite/ld-x86-64/pr25416-4.s     |  15 ++++
 ld/testsuite/ld-x86-64/pr25416-5a.c    |  33 ++++++++
 ld/testsuite/ld-x86-64/pr25416-5b.s    |  52 ++++++++++++
 ld/testsuite/ld-x86-64/pr25416-5c.s    |  56 +++++++++++++
 ld/testsuite/ld-x86-64/pr25416-5d.s    |  19 +++++
 ld/testsuite/ld-x86-64/pr25416-5e.s    |  23 +++++
 ld/testsuite/ld-x86-64/x86-64.exp      | 112 +++++++++++++++++++++++++
 20 files changed, 535 insertions(+), 21 deletions(-)
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-1.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-1a.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-1b.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-2.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-2a.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-2b.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-3.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-3.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-4.d
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-4.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-5a.c
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-5b.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-5c.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-5d.s
 create mode 100644 ld/testsuite/ld-x86-64/pr25416-5e.s

diff --git a/bfd/elf64-x86-64.c b/bfd/elf64-x86-64.c
index 79e68ff4767..014bea14754 100644
--- a/bfd/elf64-x86-64.c
+++ b/bfd/elf64-x86-64.c
@@ -1223,7 +1223,8 @@ elf_x86_64_check_tls_transition (bfd *abfd,
 
     case R_X86_64_GOTPC32_TLSDESC:
       /* Check transition from GDesc access model:
-		leaq x@tlsdesc(%rip), %rax
+		leaq x@tlsdesc(%rip), %rax <--- LP64 mode.
+		rex leal x@tlsdesc(%rip), %eax <--- X32 mode.
 
 	 Make sure it's a leaq adding rip to a 32-bit offset
 	 into any register, although it's probably almost always
@@ -1233,7 +1234,8 @@ elf_x86_64_check_tls_transition (bfd *abfd,
 	return FALSE;
 
       val = bfd_get_8 (abfd, contents + offset - 3);
-      if ((val & 0xfb) != 0x48)
+      val &= 0xfb;
+      if (val != 0x48 && (ABI_64_P (abfd) || val != 0x40))
 	return FALSE;
 
       if (bfd_get_8 (abfd, contents + offset - 2) != 0x8d)
@@ -1244,13 +1246,26 @@ elf_x86_64_check_tls_transition (bfd *abfd,
 
     case R_X86_64_TLSDESC_CALL:
       /* Check transition from GDesc access model:
-		call *x@tlsdesc(%rax)
+		call *x@tlsdesc(%rax) <--- LP64 mode.
+		call *x@tlsdesc(%eax) <--- X32 mode.
        */
       if (offset + 2 <= sec->size)
 	{
-	  /* Make sure that it's a call *x@tlsdesc(%rax).  */
+	  unsigned int prefix;
 	  call = contents + offset;
-	  return call[0] == 0xff && call[1] == 0x10;
+	  prefix = 0;
+	  if (!ABI_64_P (abfd))
+	    {
+	      /* Check for call *x@tlsdesc(%eax).  */
+	      if (call[0] == 0x67)
+		{
+		  prefix = 1;
+		  if (offset + 3 > sec->size)
+		    return FALSE;
+		}
+	    }
+	  /* Make sure that it's a call *x@tlsdesc(%rax).  */
+	  return call[prefix] == 0xff && call[1 + prefix] == 0x10;
 	}
 
       return FALSE;
@@ -3401,10 +3416,13 @@ corrupt_input:
 		{
 		  /* GDesc -> LE transition.
 		     It's originally something like:
-		     leaq x@tlsdesc(%rip), %rax
+		     leaq x@tlsdesc(%rip), %rax <--- LP64 mode.
+		     rex leal x@tlsdesc(%rip), %eax <--- X32 mode.
 
 		     Change it to:
-		     movl $x@tpoff, %rax.  */
+		     movq $x@tpoff, %rax <--- LP64 mode.
+		     rex movl $x@tpoff, %eax <--- X32 mode.
+		   */
 
 		  unsigned int val, type;
 
@@ -3412,7 +3430,8 @@ corrupt_input:
 		    goto corrupt_input;
 		  type = bfd_get_8 (input_bfd, contents + roff - 3);
 		  val = bfd_get_8 (input_bfd, contents + roff - 1);
-		  bfd_put_8 (output_bfd, 0x48 | ((type >> 2) & 1),
+		  bfd_put_8 (output_bfd,
+			     (type & 0x48) | ((type >> 2) & 1),
 			     contents + roff - 3);
 		  bfd_put_8 (output_bfd, 0xc7, contents + roff - 2);
 		  bfd_put_8 (output_bfd, 0xc0 | ((val >> 3) & 7),
@@ -3426,11 +3445,30 @@ corrupt_input:
 		{
 		  /* GDesc -> LE transition.
 		     It's originally:
-		     call *(%rax)
+		     call *(%rax) <--- LP64 mode.
+		     call *(%eax) <--- X32 mode.
 		     Turn it into:
-		     xchg %ax,%ax.  */
-		  bfd_put_8 (output_bfd, 0x66, contents + roff);
-		  bfd_put_8 (output_bfd, 0x90, contents + roff + 1);
+		     xchg %ax,%ax <-- LP64 mode.
+		     nopl (%rax)  <-- X32 mode.
+		   */
+		  unsigned int prefix = 0;
+		  if (!ABI_64_P (input_bfd))
+		    {
+		      /* Check for call *x@tlsdesc(%eax).  */
+		      if (contents[roff] == 0x67)
+			prefix = 1;
+		    }
+		  if (prefix)
+		    {
+		      bfd_put_8 (output_bfd, 0x0f, contents + roff);
+		      bfd_put_8 (output_bfd, 0x1f, contents + roff + 1);
+		      bfd_put_8 (output_bfd, 0x00, contents + roff + 2);
+		    }
+		  else
+		    {
+		      bfd_put_8 (output_bfd, 0x66, contents + roff);
+		      bfd_put_8 (output_bfd, 0x90, contents + roff + 1);
+		    }
 		  continue;
 		}
 	      else if (r_type == R_X86_64_GOTTPOFF)
@@ -3741,13 +3779,18 @@ corrupt_input:
 		{
 		  /* GDesc -> IE transition.
 		     It's originally something like:
-		     leaq x@tlsdesc(%rip), %rax
+		     leaq x@tlsdesc(%rip), %rax <--- LP64 mode.
+		     rex leal x@tlsdesc(%rip), %eax <--- X32 mode.
 
 		     Change it to:
-		     movq x@gottpoff(%rip), %rax # before xchg %ax,%ax.  */
+		     # before xchg %ax,%ax in LP64 mode.
+		     movq x@gottpoff(%rip), %rax
+		     # before nopl (%rax) in X32 mode.
+		     rex movl x@gottpoff(%rip), %eax
+		  */
 
 		  /* Now modify the instruction as appropriate. To
-		     turn a leaq into a movq in the form we use it, it
+		     turn a lea into a mov in the form we use it, it
 		     suffices to change the second byte from 0x8d to
 		     0x8b.  */
 		  if (roff < 2)
@@ -3768,13 +3811,32 @@ corrupt_input:
 		{
 		  /* GDesc -> IE transition.
 		     It's originally:
-		     call *(%rax)
+		     call *(%rax) <--- LP64 mode.
+		     call *(%eax) <--- X32 mode.
 
 		     Change it to:
-		     xchg %ax, %ax.  */
+		     xchg %ax, %ax <-- LP64 mode.
+		     nopl (%rax)  <-- X32 mode.
+		   */
 
-		  bfd_put_8 (output_bfd, 0x66, contents + roff);
-		  bfd_put_8 (output_bfd, 0x90, contents + roff + 1);
+		  unsigned int prefix = 0;
+		  if (!ABI_64_P (input_bfd))
+		    {
+		      /* Check for call *x@tlsdesc(%eax).  */
+		      if (contents[roff] == 0x67)
+			prefix = 1;
+		    }
+		  if (prefix)
+		    {
+		      bfd_put_8 (output_bfd, 0x0f, contents + roff);
+		      bfd_put_8 (output_bfd, 0x1f, contents + roff + 1);
+		      bfd_put_8 (output_bfd, 0x00, contents + roff + 2);
+		    }
+		  else
+		    {
+		      bfd_put_8 (output_bfd, 0x66, contents + roff);
+		      bfd_put_8 (output_bfd, 0x90, contents + roff + 1);
+		    }
 		  continue;
 		}
 	      else
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 8728725b82d..87ab43bbce0 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -8713,10 +8713,13 @@ output_insn (void)
 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
 	  /* For x32, add a dummy REX_OPCODE prefix for mov/add with
 	     R_X86_64_GOTTPOFF relocation so that linker can safely
-	     perform IE->LE optimization.  */
+	     perform IE->LE optimization.  A dummy REX_OPCODE prefix
+	     is also needed for lea with R_X86_64_GOTPC32_TLSDESC
+	     relocation for GDesc -> IE/LE optimization.  */
 	  if (x86_elf_abi == X86_64_X32_ABI
 	      && i.operands == 2
-	      && i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
+	      && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
+		  || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
 	      && i.prefix[REX_PREFIX] == 0)
 	    add_prefix (REX_OPCODE);
 #endif
diff --git a/gas/testsuite/gas/i386/ilp32/x32-tls.d b/gas/testsuite/gas/i386/ilp32/x32-tls.d
index 1255829f8b7..ab4da5c730b 100644
--- a/gas/testsuite/gas/i386/ilp32/x32-tls.d
+++ b/gas/testsuite/gas/i386/ilp32/x32-tls.d
@@ -10,4 +10,6 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	4c 8b 25 00 00 00 00 	mov    0x0\(%rip\),%r12        # e <_start\+0xe>
 [ 	]*[a-f0-9]+:	40 03 05 00 00 00 00 	rex add 0x0\(%rip\),%eax        # 15 <_start\+0x15>
 [ 	]*[a-f0-9]+:	44 03 25 00 00 00 00 	add    0x0\(%rip\),%r12d        # 1c <_start\+0x1c>
+[ 	]*[a-f0-9]+:	40 8d 05 00 00 00 00 	rex lea 0x0\(%rip\),%eax        # 23 <_start\+0x23>
+[ 	]*[a-f0-9]+:	44 8d 25 00 00 00 00 	lea    0x0\(%rip\),%r12d        # 2a <_start\+0x2a>
 #pass
diff --git a/gas/testsuite/gas/i386/ilp32/x32-tls.s b/gas/testsuite/gas/i386/ilp32/x32-tls.s
index f9626cdfdd3..e1599be73c1 100644
--- a/gas/testsuite/gas/i386/ilp32/x32-tls.s
+++ b/gas/testsuite/gas/i386/ilp32/x32-tls.s
@@ -4,6 +4,8 @@ _start:
 	mov	foo@gottpoff(%rip), %r12
 	add	foo@gottpoff(%rip), %eax
 	add	foo@gottpoff(%rip), %r12d
+	lea	foo@tlsdesc(%rip), %eax
+	lea	foo@tlsdesc(%rip), %r12d
 	.globl	foo
 	.section	.tdata,"awT",@progbits
 	.align 4
diff --git a/ld/testsuite/ld-x86-64/pr25416-1.s b/ld/testsuite/ld-x86-64/pr25416-1.s
new file mode 100644
index 00000000000..66636c12981
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-1.s
@@ -0,0 +1,13 @@
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	lea	foo@TLSDESC(%rip), %eax
+	call	*foo@TLSCALL(%eax)
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-1a.d b/ld/testsuite/ld-x86-64/pr25416-1a.d
new file mode 100644
index 00000000000..02818b0526e
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-1a.d
@@ -0,0 +1,16 @@
+#name: X32 GDesc 1
+#source: pr25416-1.s
+#as: --x32
+#ld: -melf32_x86_64 -shared
+#objdump: -dw
+
+.*: +file format .*
+
+
+#...
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	40 8d 05 ([0-9a-f]{2} ){4}[ \t]+rex lea 0x[a-f0-9]+\(%rip\),%eax[ \t]+# [a-f0-9]+ <_GLOBAL_OFFSET_TABLE_\+0x[a-f0-9]+>
+ +[a-f0-9]+:	67 ff 10             	callq  \*\(%eax\)
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-1b.d b/ld/testsuite/ld-x86-64/pr25416-1b.d
new file mode 100644
index 00000000000..d1bb6892cbe
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-1b.d
@@ -0,0 +1,15 @@
+#name: X32 GDesc -> LE 1
+#source: pr25416-1.s
+#as: --x32
+#ld: -melf32_x86_64
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	40 c7 c0 ([0-9a-f]{2} ){4}[ \t]+rex mov \$0x[a-f0-9]+,%eax
+ +[a-f0-9]+:	0f 1f 00             	nopl   \(%rax\)
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-2.s b/ld/testsuite/ld-x86-64/pr25416-2.s
new file mode 100644
index 00000000000..b6dbb6d93ad
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-2.s
@@ -0,0 +1,13 @@
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	lea	foo@TLSDESC(%rip), %rax
+	call	*foo@TLSCALL(%rax)
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-2a.d b/ld/testsuite/ld-x86-64/pr25416-2a.d
new file mode 100644
index 00000000000..d1b257652a6
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-2a.d
@@ -0,0 +1,16 @@
+#name: X32 GDesc 2
+#source: pr25416-2.s
+#as: --x32
+#ld: -melf32_x86_64 -shared
+#objdump: -dw
+
+.*: +file format .*
+
+
+#...
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	48 8d 05 ([0-9a-f]{2} ){4}[ \t]+lea    0x[a-f0-9]+\(%rip\),%rax[ \t]+# [a-f0-9]+ <_GLOBAL_OFFSET_TABLE_\+0x[a-f0-9]+>
+ +[a-f0-9]+:	ff 10                	callq  \*\(%rax\)
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-2b.d b/ld/testsuite/ld-x86-64/pr25416-2b.d
new file mode 100644
index 00000000000..e78283fd666
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-2b.d
@@ -0,0 +1,15 @@
+#name: X32 GDesc -> LE 2
+#source: pr25416-2.s
+#as: --x32
+#ld: -melf32_x86_64
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	48 c7 c0 ([0-9a-f]{2} ){4}[ \t]+mov    \$0x[a-f0-9]+,%rax
+ +[a-f0-9]+:	66 90                	xchg   %ax,%ax
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-3.d b/ld/testsuite/ld-x86-64/pr25416-3.d
new file mode 100644
index 00000000000..9c1da134847
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-3.d
@@ -0,0 +1,16 @@
+#name: X32 GDesc -> IE 1
+#as: --x32
+#ld: -melf32_x86_64 -shared
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	40 8b 05 ([0-9a-f]{2} ){4}[ \t]+rex mov 0x[a-f0-9]+\(%rip\),%eax[ \t]+# [a-f0-9]+ <.got>
+ +[a-f0-9]+:	0f 1f 00             	nopl   \(%rax\)
+ +[a-f0-9]+:	64 8b 0c 25 00 00 00 00 	mov    %fs:0x0,%ecx
+ +[a-f0-9]+:	40 03 0d ([0-9a-f]{2} ){4}[ \t]+rex add 0x[a-f0-9]+\(%rip\),%ecx[ \t]+# [a-f0-9]+ <.got>
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-3.s b/ld/testsuite/ld-x86-64/pr25416-3.s
new file mode 100644
index 00000000000..949b1239ed2
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-3.s
@@ -0,0 +1,15 @@
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	lea	foo@TLSDESC(%rip), %eax
+	call	*foo@TLSCALL(%eax)
+	movl	%fs:0, %ecx
+	addl	foo@gottpoff(%rip), %ecx
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-4.d b/ld/testsuite/ld-x86-64/pr25416-4.d
new file mode 100644
index 00000000000..8d91fbc0a66
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-4.d
@@ -0,0 +1,16 @@
+#name: X32 GDesc -> IE 2
+#as: --x32
+#ld: -melf32_x86_64 -shared
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	48 8b 05 ([0-9a-f]{2} ){4}[ \t]+mov    0x[a-f0-9]+\(%rip\),%rax[ \t]+# [a-f0-9]+ <.got>
+ +[a-f0-9]+:	66 90                	xchg   %ax,%ax
+ +[a-f0-9]+:	64 8b 0c 25 00 00 00 00 	mov    %fs:0x0,%ecx
+ +[a-f0-9]+:	40 03 0d ([0-9a-f]{2} ){4}[ \t]+rex add 0x[a-f0-9]+\(%rip\),%ecx[ \t]+# [a-f0-9]+ <.got>
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-4.s b/ld/testsuite/ld-x86-64/pr25416-4.s
new file mode 100644
index 00000000000..5120af466c7
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-4.s
@@ -0,0 +1,15 @@
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	lea	foo@TLSDESC(%rip), %rax
+	call	*foo@TLSCALL(%rax)
+	movl	%fs:0, %ecx
+	addl	foo@gottpoff(%rip), %ecx
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-5a.c b/ld/testsuite/ld-x86-64/pr25416-5a.c
new file mode 100644
index 00000000000..9d820a1911d
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-5a.c
@@ -0,0 +1,33 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+__thread int bar = 301;
+
+extern int *test1 (int);
+extern int *test2 (int);
+extern int *test3 (int);
+
+int
+main ()
+{
+  int *p;
+  p = test1 (30);
+  if (*p != 30)
+    abort ();
+  *p = 40;
+  test1 (40);
+  p = test2 (301);
+  if (*p != 301)
+    abort ();
+  if (p != &bar)
+    abort ();
+  *p = 40;
+  test2 (40);
+  p = test3 (40);
+  if (*p != 40)
+    abort ();
+  *p = 50;
+  test3 (50);
+  puts ("PASS");
+  return 0;
+}
diff --git a/ld/testsuite/ld-x86-64/pr25416-5b.s b/ld/testsuite/ld-x86-64/pr25416-5b.s
new file mode 100644
index 00000000000..8edd248c4fe
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-5b.s
@@ -0,0 +1,52 @@
+	.text
+	.p2align 4
+	.globl	test1
+	.type	test1, @function
+test1:
+	.cfi_startproc
+	subl	$8, %esp
+	.cfi_def_cfa_offset 16
+	lea	foo@TLSDESC(%rip), %eax
+	call	*foo@TLSCALL(%eax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L5
+	addl	$8, %esp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	ret
+.L5:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test1, .-test1
+	.p2align 4
+	.globl	test2
+	.type	test2, @function
+test2:
+	.cfi_startproc
+	subl	$8, %esp
+	.cfi_def_cfa_offset 16
+	lea	bar@TLSDESC(%rip), %eax
+	call	*bar@TLSCALL(%eax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L9
+	addl	$8, %esp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	ret
+.L9:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test2, .-test2
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.hidden foo
+	.globl foo
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-5c.s b/ld/testsuite/ld-x86-64/pr25416-5c.s
new file mode 100644
index 00000000000..f42a8a0e6b3
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-5c.s
@@ -0,0 +1,56 @@
+	.text
+	.p2align 4
+	.globl	test1
+	.type	test1, @function
+test1:
+	.cfi_startproc
+	subq	$8, %rsp
+	.cfi_def_cfa_offset 16
+	lea	foo@TLSDESC(%rip), %rax
+	call	*foo@TLSCALL(%rax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L5
+	movl	%eax, %r8d
+	addq	$8, %rsp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	movq	%r8, %rax
+	ret
+.L5:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test1, .-test1
+	.p2align 4
+	.globl	test2
+	.type	test2, @function
+test2:
+	.cfi_startproc
+	subq	$8, %rsp
+	.cfi_def_cfa_offset 16
+	lea	bar@TLSDESC(%rip), %rax
+	call	*bar@TLSCALL(%rax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L9
+	movl	%eax, %r8d
+	addq	$8, %rsp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	movq	%r8, %rax
+	ret
+.L9:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test2, .-test2
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.hidden foo
+	.globl foo
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-5d.s b/ld/testsuite/ld-x86-64/pr25416-5d.s
new file mode 100644
index 00000000000..8d36733d9bf
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-5d.s
@@ -0,0 +1,19 @@
+	.text
+	.p2align 4
+	.globl	test3
+	.type	test3, @function
+test3:
+	.cfi_startproc
+	movl	%fs:0, %eax
+	addq	foo@gottpoff(%rip), %rax
+	cmpl	%edi, (%eax)
+	jne	.L7
+	movl	%eax, %eax
+	ret
+.L7:
+	pushq	%rax
+	.cfi_def_cfa_offset 16
+	call	abort@PLT
+	.cfi_endproc
+	.size	test3, .-test3
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-5e.s b/ld/testsuite/ld-x86-64/pr25416-5e.s
new file mode 100644
index 00000000000..fe0c0e8b111
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-5e.s
@@ -0,0 +1,23 @@
+	.text
+	.p2align 4
+	.globl	test3
+	.type	test3, @function
+test3:
+	.cfi_startproc
+	subl	$8, %esp
+	.cfi_def_cfa_offset 16
+	lea	foo@TLSDESC(%rip), %eax
+	call	*foo@TLSCALL(%eax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L5
+	addl	$8, %esp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	ret
+.L5:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test3, .-test3
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/x86-64.exp b/ld/testsuite/ld-x86-64/x86-64.exp
index 88f75e0e431..c78b0fd7576 100644
--- a/ld/testsuite/ld-x86-64/x86-64.exp
+++ b/ld/testsuite/ld-x86-64/x86-64.exp
@@ -461,6 +461,12 @@ run_dump_test "pr24721-x32"
 run_dump_test "pr24905"
 run_dump_test "pr24905-x32"
 run_dump_test "align-branch-1"
+run_dump_test "pr25416-1a"
+run_dump_test "pr25416-2b"
+run_dump_test "pr25416-2a"
+run_dump_test "pr25416-2b"
+run_dump_test "pr25416-3"
+run_dump_test "pr25416-4"
 
 if { ![istarget "x86_64-*-linux*"] && ![istarget "x86_64-*-nacl*"]} {
     return
@@ -1302,6 +1308,51 @@ if { [isnative] && [check_compiler_available] } {
     ]
 
     if  {[istarget "x86_64-*-linux*-gnux32"]} {
+	run_cc_link_tests [list \
+	    [list \
+		"Build pr25416-5b.o (GDesc -maddress-mode=short)" \
+		"" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5b.s } \
+	    ] \
+	    [list \
+		"Build pr25416-5b.so (GDesc -> IE -maddress-mode=short)" \
+		"-shared" \
+		"-fPIC -Wa,-mx86-used-note=yes" \
+		{ pr25416-5b.s pr25416-5d.s } \
+		{} \
+		"pr25416-5b.so" \
+	    ] \
+	    [list \
+		"Build pr25416-5c.o (GDesc -maddress-mode=long)" \
+		"" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5c.s } \
+	    ] \
+	    [list \
+		"Build pr25416-5c.so (GDesc -> IE -maddress-mode=long)" \
+		"-shared" \
+		"-fPIC -Wa,-mx86-used-note=yes" \
+		{ pr25416-5c.s pr25416-5d.s } \
+		{} \
+		"pr25416-5c.so" \
+	    ] \
+	    [list \
+		"Build pr25416-5d.so (GDesc -maddress-mode=short)" \
+		"-shared" \
+		"-fPIC -Wa,-mx86-used-note=yes" \
+		{ pr25416-5b.s pr25416-5e.s } \
+		{} \
+		"pr25416-5d.so" \
+	    ] \
+	    [list \
+		"Build pr25416-5d.o (IE -maddress-mode=short)" \
+		"" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5d.s } \
+	    ] \
+	]
+
 	run_ld_link_exec_tests [list \
 	    [list \
 		"Run pr22001-1b" \
@@ -1321,6 +1372,67 @@ if { [isnative] && [check_compiler_available] } {
 		"pass.out" \
 		"$NOPIE_CFLAGS" \
 	    ] \
+	    [list \
+		"Run pr25416-5a (GDesc -> IE -maddress-mode=short)" \
+		"$NOPIE_LDFLAGS -Wl,--no-as-needed tmpdir/pr25416-5b.so" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5a" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-5b (GDesc -> LE -maddress-mode=short" \
+		"$NOPIE_LDFLAGS tmpdir/pr25416-5b.o tmpdir/pr25416-5d.o" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5b" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-5c (GDesc -> IE -maddress-mode=long)" \
+		"$NOPIE_LDFLAGS -Wl,--no-as-needed tmpdir/pr25416-5c.so" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5c" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-5d (GDesc -> LE -maddress-mode=long)" \
+		"$NOPIE_LDFLAGS tmpdir/pr25416-5c.o tmpdir/pr25416-5d.o" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5d" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-5e (GDesc -maddress-mode=short)" \
+		"$NOPIE_LDFLAGS -Wl,--no-as-needed tmpdir/pr25416-5d.so" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5e" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-5f (PIE GDesc -> LE -maddress-mode=short)" \
+		"-pie tmpdir/pr25416-5b.o tmpdir/pr25416-5d.o" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5f" \
+		"pass.out" \
+	    ] \
+	    [list \
+		"Run pr25416-5g (PIE GDesc -> LE -maddress-mode=long)" \
+		"-pie tmpdir/pr25416-5c.o tmpdir/pr25416-5d.o" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-5a.c } \
+		"pr25416-5g" \
+		"pass.out" \
+	    ] \
 	]
     } else {
 	run_cc_link_tests [list \
Jan Beulich Jan. 20, 2020, 3:18 p.m. | #4
On 20.01.2020 15:55, H.J. Lu wrote:
> We need to support both sequences for x32.  For both GDesc -> IE/LE

> transitions,

> 

> 67 ff 10		call	*x@TLSCALL(%eax)

> 

> should relaxed to

> 

> 0f 1f 00		nopl	(%rax)


Since, like it is phrased here, the patch also uses this
unconditionally, I wonder if this is the best choice: Linux,
for example, has a number of cases where it wouldn't use
this NOP form even on 64-bit.

Jan
H.J. Lu Jan. 20, 2020, 3:28 p.m. | #5
On Mon, Jan 20, 2020 at 7:18 AM Jan Beulich <jbeulich@suse.com> wrote:
>

> On 20.01.2020 15:55, H.J. Lu wrote:

> > We need to support both sequences for x32.  For both GDesc -> IE/LE

> > transitions,

> >

> > 67 ff 10              call    *x@TLSCALL(%eax)

> >

> > should relaxed to

> >

> > 0f 1f 00              nopl    (%rax)

>

> Since, like it is phrased here, the patch also uses this

> unconditionally, I wonder if this is the best choice: Linux,

> for example, has a number of cases where it wouldn't use

> this NOP form even on 64-bit.

>


Assembler has been using this 3-byte NOP in 64-bit for
a long time.  What other options are you suggesting?


-- 
H.J.
Jan Beulich Jan. 20, 2020, 3:38 p.m. | #6
On 20.01.2020 16:28, H.J. Lu wrote:
> On Mon, Jan 20, 2020 at 7:18 AM Jan Beulich <jbeulich@suse.com> wrote:

>>

>> On 20.01.2020 15:55, H.J. Lu wrote:

>>> We need to support both sequences for x32.  For both GDesc -> IE/LE

>>> transitions,

>>>

>>> 67 ff 10              call    *x@TLSCALL(%eax)

>>>

>>> should relaxed to

>>>

>>> 0f 1f 00              nopl    (%rax)

>>

>> Since, like it is phrased here, the patch also uses this

>> unconditionally, I wonder if this is the best choice: Linux,

>> for example, has a number of cases where it wouldn't use

>> this NOP form even on 64-bit.

>>

> 

> Assembler has been using this 3-byte NOP in 64-bit for

> a long time.  What other options are you suggesting?


The traditional K8 NOPs (up to three [iirc] 0x66 followed by
0x90), and preferably controllable in some way.

Jan
H.J. Lu Jan. 20, 2020, 3:49 p.m. | #7
On Mon, Jan 20, 2020 at 7:38 AM Jan Beulich <jbeulich@suse.com> wrote:
>

> On 20.01.2020 16:28, H.J. Lu wrote:

> > On Mon, Jan 20, 2020 at 7:18 AM Jan Beulich <jbeulich@suse.com> wrote:

> >>

> >> On 20.01.2020 15:55, H.J. Lu wrote:

> >>> We need to support both sequences for x32.  For both GDesc -> IE/LE

> >>> transitions,

> >>>

> >>> 67 ff 10              call    *x@TLSCALL(%eax)

> >>>

> >>> should relaxed to

> >>>

> >>> 0f 1f 00              nopl    (%rax)

> >>

> >> Since, like it is phrased here, the patch also uses this

> >> unconditionally, I wonder if this is the best choice: Linux,

> >> for example, has a number of cases where it wouldn't use

> >> this NOP form even on 64-bit.

> >>

> >

> > Assembler has been using this 3-byte NOP in 64-bit for

> > a long time.  What other options are you suggesting?

>

> The traditional K8 NOPs (up to three [iirc] 0x66 followed by

> 0x90), and preferably controllable in some way.

>


We have been doing this way for many years.  I don't think
we should change it now.

-- 
H.J.
Jan Beulich Jan. 20, 2020, 3:51 p.m. | #8
On 20.01.2020 16:49, H.J. Lu wrote:
> On Mon, Jan 20, 2020 at 7:38 AM Jan Beulich <jbeulich@suse.com> wrote:

>>

>> On 20.01.2020 16:28, H.J. Lu wrote:

>>> On Mon, Jan 20, 2020 at 7:18 AM Jan Beulich <jbeulich@suse.com> wrote:

>>>>

>>>> On 20.01.2020 15:55, H.J. Lu wrote:

>>>>> We need to support both sequences for x32.  For both GDesc -> IE/LE

>>>>> transitions,

>>>>>

>>>>> 67 ff 10              call    *x@TLSCALL(%eax)

>>>>>

>>>>> should relaxed to

>>>>>

>>>>> 0f 1f 00              nopl    (%rax)

>>>>

>>>> Since, like it is phrased here, the patch also uses this

>>>> unconditionally, I wonder if this is the best choice: Linux,

>>>> for example, has a number of cases where it wouldn't use

>>>> this NOP form even on 64-bit.

>>>>

>>>

>>> Assembler has been using this 3-byte NOP in 64-bit for

>>> a long time.  What other options are you suggesting?

>>

>> The traditional K8 NOPs (up to three [iirc] 0x66 followed by

>> 0x90), and preferably controllable in some way.

>>

> 

> We have been doing this way for many years.  I don't think

> we should change it now.


Fair enough then.

Jan

Patch

diff --git a/bfd/elf64-x86-64.c b/bfd/elf64-x86-64.c
index 79e68ff4767..9fe8f163e51 100644
--- a/bfd/elf64-x86-64.c
+++ b/bfd/elf64-x86-64.c
@@ -1030,6 +1030,7 @@  elf_x86_64_check_tls_transition (bfd *abfd,
   bfd_boolean largepic = FALSE;
   struct elf_link_hash_entry *h;
   bfd_vma offset;
+  unsigned int prefix;
   struct elf_x86_link_hash_table *htab;
   bfd_byte *call;
   bfd_boolean indirect_call;
@@ -1223,18 +1224,24 @@  elf_x86_64_check_tls_transition (bfd *abfd,
 
     case R_X86_64_GOTPC32_TLSDESC:
       /* Check transition from GDesc access model:
-		leaq x@tlsdesc(%rip), %rax
+		leaq x@tlsdesc(%rip), %rax <--- LP64 mode.
+		leal x@tlsdesc(%rip), %eax <--- X32 mode.
 
 	 Make sure it's a leaq adding rip to a 32-bit offset
 	 into any register, although it's probably almost always
 	 going to be rax.  */
 
-      if (offset < 3 || (offset + 4) > sec->size)
+      prefix = ABI_64_P (abfd) ? 1 : 0;
+      if (offset < (2 + prefix)
+	  || (offset + (3 + prefix)) > sec->size)
 	return FALSE;
 
-      val = bfd_get_8 (abfd, contents + offset - 3);
-      if ((val & 0xfb) != 0x48)
-	return FALSE;
+      if (prefix)
+	{
+	  val = bfd_get_8 (abfd, contents + offset - 3);
+	  if ((val & 0xfb) != 0x48)
+	    return FALSE;
+	}
 
       if (bfd_get_8 (abfd, contents + offset - 2) != 0x8d)
 	return FALSE;
@@ -1244,13 +1251,25 @@  elf_x86_64_check_tls_transition (bfd *abfd,
 
     case R_X86_64_TLSDESC_CALL:
       /* Check transition from GDesc access model:
-		call *x@tlsdesc(%rax)
+		call *x@tlsdesc(%rax) <--- LP64 mode.
+		call *x@tlsdesc(%eax) <--- X32 mode.
        */
       if (offset + 2 <= sec->size)
 	{
-	  /* Make sure that it's a call *x@tlsdesc(%rax).  */
 	  call = contents + offset;
-	  return call[0] == 0xff && call[1] == 0x10;
+	  prefix = 0;
+	  if (!ABI_64_P (abfd))
+	    {
+	      /* Check for call *x@tlsdesc(%eax).  */
+	      if (call[0] == 0x67)
+		{
+		  prefix = 1;
+		  if (offset + 3 > sec->size)
+		    return FALSE;
+		}
+	    }
+	  /* Make sure that it's a call *x@tlsdesc(%rax).  */
+	  return call[prefix] == 0xff && call[1 + prefix] == 0x10;
 	}
 
       return FALSE;
@@ -3401,19 +3420,27 @@  corrupt_input:
 		{
 		  /* GDesc -> LE transition.
 		     It's originally something like:
-		     leaq x@tlsdesc(%rip), %rax
+		     leaq x@tlsdesc(%rip), %rax <--- LP64 mode.
+		     leal x@tlsdesc(%rip), %eax <--- X32 mode.
 
 		     Change it to:
-		     movl $x@tpoff, %rax.  */
+		     movq $x@tpoff, %rax <--- LP64 mode.
+		     movl $x@tpoff, %eax <--- X32 mode.
+		   */
 
-		  unsigned int val, type;
+		  unsigned int val, prefix;
 
-		  if (roff < 3)
+		  prefix = ABI_64_P (input_bfd) ? 1 : 0;
+		  if (roff < (2 + prefix))
 		    goto corrupt_input;
-		  type = bfd_get_8 (input_bfd, contents + roff - 3);
+		  if (prefix)
+		    {
+		      unsigned int type;
+		      type = bfd_get_8 (input_bfd, contents + roff - 3);
+		      bfd_put_8 (output_bfd, 0x48 | ((type >> 2) & 1),
+				 contents + roff - 3);
+		    }
 		  val = bfd_get_8 (input_bfd, contents + roff - 1);
-		  bfd_put_8 (output_bfd, 0x48 | ((type >> 2) & 1),
-			     contents + roff - 3);
 		  bfd_put_8 (output_bfd, 0xc7, contents + roff - 2);
 		  bfd_put_8 (output_bfd, 0xc0 | ((val >> 3) & 7),
 			     contents + roff - 1);
@@ -3426,11 +3453,30 @@  corrupt_input:
 		{
 		  /* GDesc -> LE transition.
 		     It's originally:
-		     call *(%rax)
+		     call *(%rax) <--- LP64 mode.
+		     call *(%eax) <--- X32 mode.
 		     Turn it into:
-		     xchg %ax,%ax.  */
-		  bfd_put_8 (output_bfd, 0x66, contents + roff);
-		  bfd_put_8 (output_bfd, 0x90, contents + roff + 1);
+		     xchg %ax,%ax <-- LP64 mode.
+		     nopl (%rax)  <-- X32 mode.
+		   */
+		  unsigned int prefix = 0;
+		  if (!ABI_64_P (input_bfd))
+		    {
+		      /* Check for call *x@tlsdesc(%eax).  */
+		      if (contents[roff] == 0x67)
+			prefix = 1;
+		    }
+		  if (prefix)
+		    {
+		      bfd_put_8 (output_bfd, 0x0f, contents + roff);
+		      bfd_put_8 (output_bfd, 0x1f, contents + roff + 1);
+		      bfd_put_8 (output_bfd, 0x00, contents + roff + 2);
+		    }
+		  else
+		    {
+		      bfd_put_8 (output_bfd, 0x66, contents + roff);
+		      bfd_put_8 (output_bfd, 0x90, contents + roff + 1);
+		    }
 		  continue;
 		}
 	      else if (r_type == R_X86_64_GOTTPOFF)
diff --git a/ld/testsuite/ld-x86-64/pr25416-1.d b/ld/testsuite/ld-x86-64/pr25416-1.d
new file mode 100644
index 00000000000..39854cd3510
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-1.d
@@ -0,0 +1,13 @@ 
+#as: --x32
+#ld: -melf32_x86_64
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	c7 c0 [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f][ \t]+mov    \$0x[a-f0-9]+,%eax
+ +[a-f0-9]+:	0f 1f 00             	nopl   \(%rax\)
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-1.s b/ld/testsuite/ld-x86-64/pr25416-1.s
new file mode 100644
index 00000000000..66636c12981
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-1.s
@@ -0,0 +1,13 @@ 
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	lea	foo@TLSDESC(%rip), %eax
+	call	*foo@TLSCALL(%eax)
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-2.d b/ld/testsuite/ld-x86-64/pr25416-2.d
new file mode 100644
index 00000000000..e60c8222474
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-2.d
@@ -0,0 +1,13 @@ 
+#as: --x32
+#ld: -melf32_x86_64
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[a-f0-9]+ <_start>:
+ +[a-f0-9]+:	48 c7 c0 [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f] [0-9a-f][0-9a-f][ \t]+mov    \$0x[a-f0-9]+,%rax
+ +[a-f0-9]+:	66 90                	xchg   %ax,%ax
+#pass
diff --git a/ld/testsuite/ld-x86-64/pr25416-2.s b/ld/testsuite/ld-x86-64/pr25416-2.s
new file mode 100644
index 00000000000..b6dbb6d93ad
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-2.s
@@ -0,0 +1,13 @@ 
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	lea	foo@TLSDESC(%rip), %rax
+	call	*foo@TLSCALL(%rax)
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-3a.c b/ld/testsuite/ld-x86-64/pr25416-3a.c
new file mode 100644
index 00000000000..521c13b38a8
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-3a.c
@@ -0,0 +1,27 @@ 
+#include <stdlib.h>
+#include <stdio.h>
+
+__thread int bar = 301;
+
+extern int *test1 (int);
+extern int *test2 (int);
+
+int
+main ()
+{
+  int *p;
+  p = test1 (30);
+  if (*p != 30)
+    abort ();
+  *p = 40;
+  test1 (40);
+  p = test2 (301);
+  if (*p != 301)
+    abort ();
+  if (p != &bar)
+    abort ();
+  *p = 40;
+  test2 (40);
+  puts ("PASS");
+  return 0;
+}
diff --git a/ld/testsuite/ld-x86-64/pr25416-3b.s b/ld/testsuite/ld-x86-64/pr25416-3b.s
new file mode 100644
index 00000000000..95a0226aa42
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-3b.s
@@ -0,0 +1,50 @@ 
+	.text
+	.p2align 4
+	.globl	test1
+	.type	test1, @function
+test1:
+	.cfi_startproc
+	subl	$8, %esp
+	.cfi_def_cfa_offset 16
+	lea	foo@TLSDESC(%rip), %eax
+	call	*foo@TLSCALL(%eax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L5
+	addl	$8, %esp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	ret
+.L5:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test1, .-test1
+	.p2align 4
+	.globl	test2
+	.type	test2, @function
+test2:
+	.cfi_startproc
+	subl	$8, %esp
+	.cfi_def_cfa_offset 16
+	lea	bar@TLSDESC(%rip), %eax
+	call	*bar@TLSCALL(%eax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L9
+	addl	$8, %esp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	ret
+.L9:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test2, .-test2
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/pr25416-3c.s b/ld/testsuite/ld-x86-64/pr25416-3c.s
new file mode 100644
index 00000000000..94a64bad9ea
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/pr25416-3c.s
@@ -0,0 +1,54 @@ 
+	.text
+	.p2align 4
+	.globl	test1
+	.type	test1, @function
+test1:
+	.cfi_startproc
+	subq	$8, %rsp
+	.cfi_def_cfa_offset 16
+	lea	foo@TLSDESC(%rip), %rax
+	call	*foo@TLSCALL(%rax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L5
+	movl	%eax, %r8d
+	addq	$8, %rsp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	movq	%r8, %rax
+	ret
+.L5:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test1, .-test1
+	.p2align 4
+	.globl	test2
+	.type	test2, @function
+test2:
+	.cfi_startproc
+	subq	$8, %rsp
+	.cfi_def_cfa_offset 16
+	lea	bar@TLSDESC(%rip), %rax
+	call	*bar@TLSCALL(%rax)
+	addl	%fs:0, %eax
+	cmpl	%edi, (%eax)
+	jne	.L9
+	movl	%eax, %r8d
+	addq	$8, %rsp
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+	movq	%r8, %rax
+	ret
+.L9:
+	.cfi_restore_state
+	call	abort@PLT
+	.cfi_endproc
+	.size	test2, .-test2
+	.section	.tdata,"awT",@progbits
+	.align 4
+	.type	foo, @object
+	.size	foo, 4
+foo:
+	.long	30
+	.section	.note.GNU-stack,"",@progbits
diff --git a/ld/testsuite/ld-x86-64/x86-64.exp b/ld/testsuite/ld-x86-64/x86-64.exp
index 88f75e0e431..535b65f7bdf 100644
--- a/ld/testsuite/ld-x86-64/x86-64.exp
+++ b/ld/testsuite/ld-x86-64/x86-64.exp
@@ -461,6 +461,8 @@  run_dump_test "pr24721-x32"
 run_dump_test "pr24905"
 run_dump_test "pr24905-x32"
 run_dump_test "align-branch-1"
+run_dump_test "pr25416-1"
+run_dump_test "pr25416-2"
 
 if { ![istarget "x86_64-*-linux*"] && ![istarget "x86_64-*-nacl*"]} {
     return
@@ -1302,6 +1304,37 @@  if { [isnative] && [check_compiler_available] } {
     ]
 
     if  {[istarget "x86_64-*-linux*-gnux32"]} {
+	run_cc_link_tests [list \
+	    [list \
+		"Build pr25416-3b.o" \
+		"" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-3b.s } \
+	    ] \
+	    [list \
+		"Build pr25416-3b.so" \
+		"-shared" \
+		"-fPIC -Wa,-mx86-used-note=yes" \
+		{ pr25416-3b.s } \
+		{} \
+		"pr25416-3b.so" \
+	    ] \
+	    [list \
+		"Build pr25416-3c.o" \
+		"" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-3c.s } \
+	    ] \
+	    [list \
+		"Build pr25416-3c.so" \
+		"-shared" \
+		"-fPIC -Wa,-mx86-used-note=yes" \
+		{ pr25416-3b.s } \
+		{} \
+		"pr25416-3c.so" \
+	    ] \
+	]
+
 	run_ld_link_exec_tests [list \
 	    [list \
 		"Run pr22001-1b" \
@@ -1321,6 +1354,42 @@  if { [isnative] && [check_compiler_available] } {
 		"pass.out" \
 		"$NOPIE_CFLAGS" \
 	    ] \
+	    [list \
+		"Run pr25416-3a" \
+		"$NOPIE_LDFLAGS -Wl,--no-as-needed tmpdir/pr25416-3b.so" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-3a.c } \
+		"pr25416-3a" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-3b" \
+		"$NOPIE_LDFLAGS tmpdir/pr25416-3b.o" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-3a.c } \
+		"pr25416-3b" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-3c" \
+		"$NOPIE_LDFLAGS -Wl,--no-as-needed tmpdir/pr25416-3c.so" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-3a.c } \
+		"pr25416-3c" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
+	    [list \
+		"Run pr25416-3d" \
+		"$NOPIE_LDFLAGS tmpdir/pr25416-3c.o" \
+		"-Wa,-mx86-used-note=yes" \
+		{ pr25416-3a.c } \
+		"pr25416-3d" \
+		"pass.out" \
+		"$NOPIE_CFLAGS" \
+	    ] \
 	]
     } else {
 	run_cc_link_tests [list \