rl78 umindi3 improvement

Message ID 000601d37027$40e2d7c0$c2a88740$@renesas.com
State New
Headers show
Series
  • rl78 umindi3 improvement
Related show

Commit Message

Sebastian Perta Dec. 8, 2017, 1:20 p.m.
Hello,

The following patch improves both the speed and code size for 64 bit
unsigned min for RL78:
it emits a library function call instead of emitting code for  the 64 bit
min for every single time.
The unsigned min function which was added in libgcc is hand written, so more
optimal than what GCC generates.

The change can easily be seen on the following test case:
unsigned long long my_smaxdi3(unsigned long long x, unsigned long long y){ 
return (x < y)? x : y;
}
I did not add this to the regression as it very simple and there are test
cases in the regression which test this, for example
gcc.c-torture/execute/pr49039.c and gcc.dg/torture/pr25718-1.c.
Regression test is OK, tested with the following command:
make -k check-gcc RUNTESTFLAGS=--target_board=rl78-sim

Please let me know if this is OK, Thank you!
Sebastian

+END_FUNC ___umindi3

Patch

Index: gcc/ChangeLog
===================================================================
--- gcc/ChangeLog	(revision 255471)
+++ gcc/ChangeLog	(working copy)
@@ -1,3 +1,7 @@ 
+2017-12-07  Sebastian Perta  <sebastian.perta@renesas.com>
+
+	* config/rl78/rl78.md: New define_expand "umindi3".
+	
 2017-12-07  Vladimir Makarov  <vmakarov@redhat.com>
 
 	PR target/83252
Index: gcc/config/rl78/rl78.md
===================================================================
--- gcc/config/rl78/rl78.md	(revision 255471)
+++ gcc/config/rl78/rl78.md	(working copy)
@@ -234,6 +234,16 @@ 
    DONE;"
 )
 
+(define_expand "umindi3"
+ [(set (match_operand:DI          0 "nonimmediate_operand" "")
+	(umin:DI (match_operand:DI 1 "general_operand"      "")
+		 (match_operand:DI    2 "general_operand"      "")))
+   ]
+  "optimize_size"
+  "rl78_emit_libcall (\"__umindi3\", UMIN, DImode, DImode, 3, operands);
+   DONE;"
+)
+
 (define_insn "addsi3_internal_virt"
   [(set (match_operand:SI          0 "nonimmediate_operand" "=v,&vm, vm")
 	(plus:SI (match_operand:SI 1 "general_operand"      "0, vim, vim")
Index: libgcc/ChangeLog
===================================================================
--- libgcc/ChangeLog	(revision 255471)
+++ libgcc/ChangeLog	(working copy)
@@ -1,3 +1,8 @@ 
+2017-12-07  Sebastian Perta  <sebastian.perta@renesas.com>
+
+	* config/rl78/umindi3.S: New assembly file.
+	* config/rl78/t-rl78: Added umindi3.S to LIB2ADD.
+	
 2017-11-30  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
 	* config/rs6000/_mulkc3.c (__mulkc3): Add forward declaration.
Index: libgcc/config/rl78/t-rl78
===================================================================
--- libgcc/config/rl78/t-rl78	(revision 255471)
+++ libgcc/config/rl78/t-rl78	(working copy)
@@ -32,7 +32,8 @@ 
 	$(srcdir)/config/rl78/fpmath-sf.S \
 	$(srcdir)/config/rl78/cmpsi2.S \
 	$(srcdir)/config/rl78/adddi3.S \
-	$(srcdir)/config/rl78/subdi3.S
+	$(srcdir)/config/rl78/subdi3.S \
+	$(srcdir)/config/rl78/umindi3.S
 
 LIB2FUNCS_EXCLUDE = _clzhi2 _clzsi2 _ctzhi2 _ctzsi2 \
   _popcounthi2 _popcountsi2 \
Index: libgcc/config/rl78/umindi3.S
===================================================================
--- libgcc/config/rl78/umindi3.S	(nonexistent)
+++ libgcc/config/rl78/umindi3.S	(working copy)
@@ -0,0 +1,74 @@ 
+;   Copyright (C) 2017 Free Software Foundation, Inc.
+;   Contributed by Sebastian Perta.
+; 
+; This file is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published by the
+; Free Software Foundation; either version 3, or (at your option) any
+; later version.
+; 
+; This file is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; General Public License for more details.
+; 
+; Under Section 7 of GPL version 3, you are granted additional
+; permissions described in the GCC Runtime Library Exception, version
+; 3.1, as published by the Free Software Foundation.
+;
+; You should have received a copy of the GNU General Public License and
+; a copy of the GCC Runtime Library Exception along with this program;
+; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+; <http://www.gnu.org/licenses/>.
+
+
+#include "vregs.h"
+
+    .text
+
+START_FUNC ___umindi3
+
+    ; copy first argument/operand to the output registers
+    movw   ax, [sp+4]
+    movw   r8, ax
+    movw   ax, [sp+6]
+    movw   r10, ax
+    movw   ax, [sp+8]
+    movw   r12, ax
+    movw   ax, [sp+10]
+    movw   r14, ax
+
+    ; use 16-bit compares from the most significant words downto the least
significant ones
+    movw   ax, [sp+18]
+    cmpw   ax, r14
+    bc     $.L1
+    bnz    $.L2
+
+    movw   ax, [sp+16]
+    cmpw   ax, r12
+    bc     $.L1
+    bnz    $.L2
+
+    movw   ax, [sp+14]
+    cmpw   ax, r10
+    bc     $.L1
+    bnz    $.L2
+
+    movw   ax, [sp+12]
+    cmpw   ax, r8
+    bc     $.L1
+    ret
+
+.L1:
+    ; copy second argument/operand to the output registers
+    movw   ax, [sp+12]
+    movw   r8, ax
+    movw   ax, [sp+14]
+    movw   r10, ax
+    movw   ax, [sp+16]
+    movw   r12, ax
+    movw   ax, [sp+18]
+    movw   r14, ax
+.L2:
+    ret
+