[committed,amdgcn] Zero-initialise masked load destinations

Message ID 3a6e27ba-aad7-8725-a036-6a61ff42fee3@codesourcery.com
State New
Headers show
Series
  • [committed,amdgcn] Zero-initialise masked load destinations
Related show

Commit Message

Andrew Stubbs Jan. 31, 2020, 11:20 a.m.
This is one of those things I don't know why we didn't notice sooner. 
The patch ensures that unused lanes in masked vector loads are 
zero-initialized, as per the internals manual.

This fixes an execution failure in testcase gfortran.dg/assumed_rank_1.f90.

When investigating the bug I got confused about the meaning of the 
"gather<mode>_exec" define_expand, which doesn't quite fit the pattern 
of the other "_exec" instructions. It's only used in one place so I've 
inlined it to avoid future confusion. It also reduces the likelihood of 
accidentally bypassing the zero-initialization in future.

I also needed a convenient way to create 0.0 vector constants without 
uglifying the machine description code, so extending gcn_vec_constant 
seemed like a useful place to do it.

Andrew

Patch

Zero-initialise masked load destinations

Fixes an execution failure in testcase gfortran.dg/assumed_rank_1.f90.

2020-01-30  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/gcn/gcn-valu.md (gather<mode>_exec): Move contents ...
	(mask_gather_load<mode>): ... here, and zero-initialize the
	destination.
	(maskload<mode>di): Zero-initialize the destination.
	* config/gcn/gcn.c:

diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 331c768cb88..4aad835b2ef 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -701,34 +701,6 @@ 
     DONE;
   })
 
-(define_expand "gather<mode>_exec"
-  [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
-   (match_operand:DI 1 "register_operand")
-   (match_operand:V64SI 2 "register_operand")
-   (match_operand 3 "immediate_operand")
-   (match_operand:SI 4 "gcn_alu_operand")
-   (match_operand:DI 5 "gcn_exec_reg_operand")]
-  ""
-  {
-    rtx undefmode = gcn_gen_undef (<MODE>mode);
-
-    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
-					  operands[2], operands[4],
-					  INTVAL (operands[3]), operands[5]);
-
-    if (GET_MODE (addr) == V64DImode)
-      emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
-						     const0_rtx, const0_rtx,
-						     const0_rtx, undefmode,
-						     operands[5]));
-    else
-      emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
-						      addr, const0_rtx,
-						      const0_rtx, const0_rtx,
-						      undefmode, operands[5]));
-    DONE;
-  })
-
 ; Allow any address expression
 (define_expand "gather<mode>_expr<exec>"
   [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
@@ -2801,9 +2773,12 @@ 
 		(<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
     rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
     rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
-    rtx undef = gcn_gen_undef (<MODE>mode);
-    emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, undef,
-					   exec));
+
+    /* Masked lanes are required to hold zero.  */
+    emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
+
+    emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
+					   operands[0], exec));
     DONE;
   })
 
@@ -2843,8 +2818,23 @@ 
 	operands[2] = tmp;
       }
 
-    emit_insn (gen_gather<mode>_exec (operands[0], operands[1], operands[2],
-				      operands[3], operands[4], exec));
+    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+					  operands[2], operands[4],
+					  INTVAL (operands[3]), exec);
+
+    /* Masked lanes are required to hold zero.  */
+    emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
+
+    if (GET_MODE (addr) == V64DImode)
+      emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
+						     const0_rtx, const0_rtx,
+						     const0_rtx, operands[0],
+						     exec));
+    else
+      emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
+						      addr, const0_rtx,
+						      const0_rtx, const0_rtx,
+						      operands[0], exec));
     DONE;
   })
 
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index a39e9f3fbd6..16c3aa2567e 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -992,9 +992,19 @@  gcn_vec_constant (machine_mode mode, int a)
     return CONST2_RTX (mode);*/
 
   int units = GET_MODE_NUNITS (mode);
-  rtx tem = gen_int_mode (a, GET_MODE_INNER (mode));
-  rtvec v = rtvec_alloc (units);
+  machine_mode innermode = GET_MODE_INNER (mode);
+
+  rtx tem;
+  if (FLOAT_MODE_P (innermode))
+    {
+      REAL_VALUE_TYPE rv;
+      real_from_integer (&rv, NULL, a, SIGNED);
+      tem = const_double_from_real_value (rv, innermode);
+    }
+  else
+    tem = gen_int_mode (a, innermode);
 
+  rtvec v = rtvec_alloc (units);
   for (int i = 0; i < units; ++i)
     RTVEC_ELT (v, i) = tem;