[committed,amdgcn] Implement clz and ctz

Message ID f2cb0d8a-0ce5-bb84-26d7-7bbe114e201a@codesourcery.com
State New
Headers show
Series
  • [committed,amdgcn] Implement clz and ctz
Related show

Commit Message

Andrew Stubbs Dec. 17, 2019, 1:01 p.m.
This patch implements the count leading and trailing zeros instruction 
patterns in the AMD GCN backend.

This is prerequisite for implementing the extract_last patterns.

Andrew Stubbs
Mentor Graphics / CodeSourcery

Patch

Add clz and ctz for amdgcn

2019-12-17  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/gcn/gcn.h (CLZ_DEFINED_VALUE_AT_ZERO): Define.
	(CTZ_DEFINED_VALUE_AT_ZERO): Define.
	* config/gcn/gcn.md (s_mnemonic): Add clz and ctz.
	(expander): Likewise.
	(countzeros): New code iterator.
	(<expander>si2): New insn pattern.
	(<expander>di2): New insn pattern.

diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index bdf7188b5ff..76b449ba5cf 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -644,6 +644,10 @@  enum gcn_builtin_codes
 /* This needs to match gcn_function_value.  */
 #define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, SGPR_REGNO (RETURN_VALUE_REG))
 
+/* The s_ff0 and s_flbit instructions return -1 if no input bits are set.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 2)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 2)
+
 
 /* Costs.  */
 
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 36908ba45f6..b48af0dbde8 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -331,7 +331,9 @@ 
 
 (define_code_attr s_mnemonic
   [(not "not%b")
-   (popcount "bcnt1_i32%b")])
+   (popcount "bcnt1_i32%b")
+   (clz "flbit_i32%b")
+   (ctz "ff1_i32%b")])
 
 (define_code_attr revmnemonic
   [(minus "subrev%i")
@@ -356,7 +358,9 @@ 
    (umin "umin")
    (umax "umax")
    (not "one_cmpl")
-   (popcount "popcount")])
+   (popcount "popcount")
+   (clz "clz")
+   (ctz "ctz")])
 
 ;; }}}
 ;; {{{ Miscellaneous instructions
@@ -1389,6 +1393,28 @@ 
   [(set_attr "type" "sop1,vop1")
    (set_attr "length" "8")])
 
+(define_code_iterator countzeros [clz ctz])
+
+(define_insn "<expander>si2"
+  [(set (match_operand:SI 0 "register_operand"  "=Sg,Sg")
+        (countzeros:SI
+	  (match_operand:SI 1 "gcn_alu_operand" "SgA, B")))]
+  ""
+  "s_<s_mnemonic>1\t%0, %1"
+  [(set_attr "type" "sop1")
+   (set_attr "length" "4,8")])
+
+; The truncate ensures that a constant passed to operand 1 is treated as DImode
+(define_insn "<expander>di2"
+  [(set (match_operand:SI 0 "register_operand"    "=Sg,Sg")
+	(truncate:SI
+	  (countzeros:DI
+	    (match_operand:DI 1 "gcn_alu_operand" "SgA, B"))))]
+  ""
+  "s_<s_mnemonic>1\t%0, %1"
+  [(set_attr "type" "sop1")
+   (set_attr "length" "4,8")])
+
 ;; }}}
 ;; {{{ ALU: generic 32-bit binop