[committed,amdgcn] Implement extract_last and fold_extract_last

Message ID 112f1eef-73b5-dc07-7523-1a1d570e401b@codesourcery.com
State New
Headers show
Series
  • [committed,amdgcn] Implement extract_last and fold_extract_last
Related show

Commit Message

Andrew Stubbs Dec. 17, 2019, 1:08 p.m.
This patch implements the vector extract last instruction patterns in 
the AMD GCN backend.

This is both an optimization and a "fix" for pr92772, in which the 
conditional reduction algorithm is broken for architectures with masked 
vectors.

This fixes too many testcase failures in vect.exp to name them all 
individually, but includes vect-cond_reduc-* and pr65947-10.c.

Andrew Stubbs
Mentor Graphics / CodeSourcery

Patch

Add extract_last for amdgcn

2019-12-17  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/gcn/gcn-valu.md (extract_last_<mode>): New expander.
	(fold_extract_last_<mode>): New expander.

	gcc/testsuite/
	* lib/target-supports.exp
	(check_effective_target_vect_fold_extract_last): Add amdgcn.

diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 42604466161..3b3be8a9e36 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -591,6 +591,48 @@ 
    (set_attr "exec" "none")
    (set_attr "laneselect" "yes")])
 
+(define_expand "extract_last_<mode>"
+  [(match_operand:<SCALAR_MODE> 0 "register_operand")
+   (match_operand:DI 1 "gcn_alu_operand")
+   (match_operand:VEC_ALLREG_MODE 2 "register_operand")]
+  "can_create_pseudo_p ()"
+  {
+    rtx dst = operands[0];
+    rtx mask = operands[1];
+    rtx vect = operands[2];
+    rtx tmpreg = gen_reg_rtx (SImode);
+
+    emit_insn (gen_clzdi2 (tmpreg, mask));
+    emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
+    emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
+    DONE;
+  })
+
+(define_expand "fold_extract_last_<mode>"
+  [(match_operand:<SCALAR_MODE> 0 "register_operand")
+   (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
+   (match_operand:DI 2 "gcn_alu_operand")
+   (match_operand:VEC_ALLREG_MODE 3 "register_operand")]
+  "can_create_pseudo_p ()"
+  {
+    rtx dst = operands[0];
+    rtx default_value = operands[1];
+    rtx mask = operands[2];
+    rtx vect = operands[3];
+    rtx else_label = gen_label_rtx ();
+    rtx end_label = gen_label_rtx ();
+
+    rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
+    emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
+    emit_insn (gen_extract_last_<mode> (dst, mask, vect));
+    emit_jump_insn (gen_jump (end_label));
+    emit_barrier ();
+    emit_label (else_label);
+    emit_move_insn (dst, default_value);
+    emit_label (end_label);
+    DONE;
+  })
+
 (define_expand "vec_init<mode><scalar_mode>"
   [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
    (match_operand 1)]
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 80e9d6720bd..98f1141a8a4 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6974,7 +6974,8 @@  proc check_effective_target_vect_logical_reduc { } {
 # Return 1 if the target supports the fold_extract_last optab.
 
 proc check_effective_target_vect_fold_extract_last { } {
-    return [check_effective_target_aarch64_sve]
+    return [expr { [check_effective_target_aarch64_sve]
+		   || [istarget amdgcn*-*-*] }]
 }
 
 # Return 1 if the target supports section-anchors