[05/31] VAX: Rationalize expression and address costs

Message ID alpine.LFD.2.21.2011200235010.656242@eddie.linux-mips.org
State New
Headers show
Series
  • VAX: Bring the port up to date (yes, MODE_CC conversion is included)
Related show

Commit Message

Maciej W. Rozycki Nov. 20, 2020, 3:34 a.m.
Expression costs are required to be given in terms of COSTS_N_INSNS (n),
which is defined to stand for the count of single fast instructions, and
actually returns `n * 4'.  The VAX backend however instead operates on
naked numbers, causing an anomaly for the integer const zero rtx, where
the cost given is 4 as opposed to 1 for integers in the [1:63] range, as
well as -1 for comparisons.  This is because the value of 0 returned by
`vax_rtx_costs' is converted to COSTS_N_INSNS (1) in `pattern_cost':

  return cost > 0 ? cost : COSTS_N_INSNS (1);

Consequently, where feasible, 1 or -1 are preferred over 0 by the middle
end causing code pessimization, e.g. rather than producing this:

	subl2 $4,%sp
	movl 4(%ap),%r0
	jgtr .L2
	addl2 $2,%r0
.L2:
	ret

or this:

	subl2 $4,%sp
	addl3 4(%ap),8(%ap),%r0
	jlss .L6
	addl2 $2,%r0
.L6:
	ret

code is produced like this:

	subl2 $4,%sp
	movl 4(%ap),%r0
	cmpl %r0,$1
	jgeq .L2
	addl2 $2,%r0
.L2:
	ret

or this:

	subl2 $4,%sp
	addl3 4(%ap),8(%ap),%r0
	cmpl %r0,$-1
	jleq .L6
	addl2 $2,%r0
.L6:
	ret

from this:

int
compare_mov (int x)
{
  if (x > 0)
    return x;
  else
    return x + 2;
}

and this:

int
compare_add (int x, int y)
{
  int z;

  z = x + y;
  if (z < 0)
    return z;
  else
    return z + 2;
}

respectively, which is slower and larger both at a time.

Furthermore once the backend is converted to MODE_CC this anomaly makes
it usually impossible to remove redundant comparisons in the comparison
elimination pass, because most VAX instructions set the condition codes
as per the relation of the instruction's result to 0 and not -1.

The middle end has some other assumptions as to rtx costs being given in
terms of COSTS_N_INSNS, so wrap all the VAX rtx costs then as they stand
into COSTS_N_INSNS invocations, effectively scaling the costs by 4 while
preserving their relative values, except for the integer const zero rtx
given the value of `COSTS_N_INSNS (1) / 2', half of a fast instruction
(this can be further halved if needed in the future).

Adjust address costs likewise so that they remain proportional to the
new absolute values of rtx costs.

Code size stats are as follows, collected from 17639 executables built
in `check-c' GCC testing:

              samples average  median
--------------------------------------
regressions      1420  0.400%  0.195%
unchanged       13811  0.000%  0.000%
progressions     2408 -0.504% -0.201%
--------------------------------------
total           17639 -0.037%  0.000%

with a small number of outliers only (over 5% size change):

old     new     change  %change filename
----------------------------------------------------
4991    5249     258     5.1693 981001-1.exe
2637    2777     140     5.3090 interchange-6.exe
2187    2307     120     5.4869 sprintf.x7
3969    4197     228     5.7445 pr28982a.exe
8264    8816     552     6.6795 vector-compare-1.exe
5199    5575     376     7.2321 pr28982b.exe
2113    2411     298    14.1031 20030323-1.exe
2113    2411     298    14.1031 20030323-1.exe
2113    2411     298    14.1031 20030323-1.exe

so it seems we are looking good, and we have complementing reductions
to compensate:

old     new     change  %change filename
----------------------------------------------------
2919    2631    -288    -9.8663 pr57521.exe
3427    3167    -260    -7.5868 sabd_1.exe
2985    2765    -220    -7.3701 ssad-run.exe
2985    2765    -220    -7.3701 ssad-run.exe
2985    2765    -220    -7.3701 usad-run.exe
2985    2765    -220    -7.3701 usad-run.exe
4509    4253    -256    -5.6775 vshuf-v2sf.exe
4541    4285    -256    -5.6375 vshuf-v2si.exe
4673    4417    -256    -5.4782 vshuf-v2df.exe
2993    2841    -152    -5.0785 abs-2.x4
2993    2841    -152    -5.0785 abs-3.x4

This actually causes `loop-8.c' to regress:

FAIL: gcc.dg/loop-8.c scan-rtl-dump-times loop2_invariant "Decided" 1
FAIL: gcc.dg/loop-8.c scan-rtl-dump-not loop2_invariant "without introducing a new temporary register"

but upon a closer inspection this is a red herring.  Old code looks as
follows:

	.file	"loop-8.c"
	.text
	.align 1
.globl f
	.type	f, @function
f:
	.word 0
	subl2 $4,%sp
	movl 4(%ap),%r2
	movl 8(%ap),%r3
	movl $42,(%r2)
	clrl %r0
	movl $42,%r1
	movl %r1,%r4
	jbr .L2
.L5:
	movl %r4,%r1
.L2:
	movl %r1,(%r3)[%r0]
	incl %r0
	cmpl %r0,$100
	jeql .L6
	movl $42,(%r2)[%r0]
	bicl3 $-2,%r0,%r1
	jeql .L5
	movl %r0,%r1
	jbr .L2
.L6:
	ret
	.size	f, .-f

while new one is like below:

	.file	"loop-8.c"
	.text
	.align 1
.globl f
	.type	f, @function
f:
	.word 0
	subl2 $4,%sp
	movl 4(%ap),%r2
	movl $42,(%r2)+
	movl 8(%ap),%r1
	clrl %r0
	movl $42,%r3
	movzbl $100,%r4
	movl %r3,%r5
	jbr .L2
.L5:
	movl %r5,%r3
.L2:
	movl %r3,(%r1)+
	incl %r0
	cmpl %r0,%r4
	jeql .L6
	movl $42,(%r2)+
	bicl3 $-2,%r0,%r3
	jeql .L5
	movl %r0,%r3
	jbr .L2
.L6:
	ret
	.size	f, .-f

and is clearly better: not only it is smaller, but it also uses the
post-increment rather than indexed addressing mode in the loop, of
which the former comes for free in terms of both performance and code
size while the latter causes an extra byte per operand to be produced
for the index register and also incurs an execution penalty for the
extra address calculation.

Exclude the case from VAX testing then, as already done for some other
targets and discussed with commit d242fdaec186 ("gcc.dg/loop-8.c: Skip
for mmix.").

	gcc/
	* config/vax/vax.c (vax_address_cost): Express the cost in terms
	of COSTS_N_INSNS.
	(vax_rtx_costs): Likewise.

	gcc/testsuite/
	* gcc.dg/loop-8.c: Exclude for `vax-*-*'.
	* gcc.target/vax/compare-add-zero.c: New test.
	* gcc.target/vax/compare-mov-zero.c: New test.
---
 gcc/config/vax/vax.c                            | 110 ++++++++++++------------
 gcc/testsuite/gcc.dg/loop-8.c                   |   2 +-
 gcc/testsuite/gcc.target/vax/compare-add-zero.c |  27 ++++++
 gcc/testsuite/gcc.target/vax/compare-mov-zero.c |  24 ++++++
 4 files changed, 109 insertions(+), 54 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/vax/compare-add-zero.c
 create mode 100644 gcc/testsuite/gcc.target/vax/compare-mov-zero.c

-- 
2.11.0

Comments

Richard Sandiford via Gcc-patches Nov. 21, 2020, 3:48 a.m. | #1
On 11/19/20 8:34 PM, Maciej W. Rozycki wrote:
> Expression costs are required to be given in terms of COSTS_N_INSNS (n),

> which is defined to stand for the count of single fast instructions, and

> actually returns `n * 4'.  The VAX backend however instead operates on

> naked numbers, causing an anomaly for the integer const zero rtx, where

> the cost given is 4 as opposed to 1 for integers in the [1:63] range, as

> well as -1 for comparisons.  This is because the value of 0 returned by

> `vax_rtx_costs' is converted to COSTS_N_INSNS (1) in `pattern_cost':

>

>   return cost > 0 ? cost : COSTS_N_INSNS (1);

>

> Consequently, where feasible, 1 or -1 are preferred over 0 by the middle

> end causing code pessimization, e.g. rather than producing this:

>

> 	subl2 $4,%sp

> 	movl 4(%ap),%r0

> 	jgtr .L2

> 	addl2 $2,%r0

> .L2:

> 	ret

>

> or this:

>

> 	subl2 $4,%sp

> 	addl3 4(%ap),8(%ap),%r0

> 	jlss .L6

> 	addl2 $2,%r0

> .L6:

> 	ret

>

> code is produced like this:

>

> 	subl2 $4,%sp

> 	movl 4(%ap),%r0

> 	cmpl %r0,$1

> 	jgeq .L2

> 	addl2 $2,%r0

> .L2:

> 	ret

>

> or this:

>

> 	subl2 $4,%sp

> 	addl3 4(%ap),8(%ap),%r0

> 	cmpl %r0,$-1

> 	jleq .L6

> 	addl2 $2,%r0

> .L6:

> 	ret

>

> from this:

>

> int

> compare_mov (int x)

> {

>   if (x > 0)

>     return x;

>   else

>     return x + 2;

> }

>

> and this:

>

> int

> compare_add (int x, int y)

> {

>   int z;

>

>   z = x + y;

>   if (z < 0)

>     return z;

>   else

>     return z + 2;

> }

>

> respectively, which is slower and larger both at a time.

>

> Furthermore once the backend is converted to MODE_CC this anomaly makes

> it usually impossible to remove redundant comparisons in the comparison

> elimination pass, because most VAX instructions set the condition codes

> as per the relation of the instruction's result to 0 and not -1.

>

> The middle end has some other assumptions as to rtx costs being given in

> terms of COSTS_N_INSNS, so wrap all the VAX rtx costs then as they stand

> into COSTS_N_INSNS invocations, effectively scaling the costs by 4 while

> preserving their relative values, except for the integer const zero rtx

> given the value of `COSTS_N_INSNS (1) / 2', half of a fast instruction

> (this can be further halved if needed in the future).

>

> Adjust address costs likewise so that they remain proportional to the

> new absolute values of rtx costs.

>

> Code size stats are as follows, collected from 17639 executables built

> in `check-c' GCC testing:

>

>               samples average  median

> --------------------------------------

> regressions      1420  0.400%  0.195%

> unchanged       13811  0.000%  0.000%

> progressions     2408 -0.504% -0.201%

> --------------------------------------

> total           17639 -0.037%  0.000%

>

> with a small number of outliers only (over 5% size change):

>

> old     new     change  %change filename

> ----------------------------------------------------

> 4991    5249     258     5.1693 981001-1.exe

> 2637    2777     140     5.3090 interchange-6.exe

> 2187    2307     120     5.4869 sprintf.x7

> 3969    4197     228     5.7445 pr28982a.exe

> 8264    8816     552     6.6795 vector-compare-1.exe

> 5199    5575     376     7.2321 pr28982b.exe

> 2113    2411     298    14.1031 20030323-1.exe

> 2113    2411     298    14.1031 20030323-1.exe

> 2113    2411     298    14.1031 20030323-1.exe

>

> so it seems we are looking good, and we have complementing reductions

> to compensate:

>

> old     new     change  %change filename

> ----------------------------------------------------

> 2919    2631    -288    -9.8663 pr57521.exe

> 3427    3167    -260    -7.5868 sabd_1.exe

> 2985    2765    -220    -7.3701 ssad-run.exe

> 2985    2765    -220    -7.3701 ssad-run.exe

> 2985    2765    -220    -7.3701 usad-run.exe

> 2985    2765    -220    -7.3701 usad-run.exe

> 4509    4253    -256    -5.6775 vshuf-v2sf.exe

> 4541    4285    -256    -5.6375 vshuf-v2si.exe

> 4673    4417    -256    -5.4782 vshuf-v2df.exe

> 2993    2841    -152    -5.0785 abs-2.x4

> 2993    2841    -152    -5.0785 abs-3.x4

>

> This actually causes `loop-8.c' to regress:

>

> FAIL: gcc.dg/loop-8.c scan-rtl-dump-times loop2_invariant "Decided" 1

> FAIL: gcc.dg/loop-8.c scan-rtl-dump-not loop2_invariant "without introducing a new temporary register"

>

> but upon a closer inspection this is a red herring.  Old code looks as

> follows:

>

> 	.file	"loop-8.c"

> 	.text

> 	.align 1

> .globl f

> 	.type	f, @function

> f:

> 	.word 0

> 	subl2 $4,%sp

> 	movl 4(%ap),%r2

> 	movl 8(%ap),%r3

> 	movl $42,(%r2)

> 	clrl %r0

> 	movl $42,%r1

> 	movl %r1,%r4

> 	jbr .L2

> .L5:

> 	movl %r4,%r1

> .L2:

> 	movl %r1,(%r3)[%r0]

> 	incl %r0

> 	cmpl %r0,$100

> 	jeql .L6

> 	movl $42,(%r2)[%r0]

> 	bicl3 $-2,%r0,%r1

> 	jeql .L5

> 	movl %r0,%r1

> 	jbr .L2

> .L6:

> 	ret

> 	.size	f, .-f

>

> while new one is like below:

>

> 	.file	"loop-8.c"

> 	.text

> 	.align 1

> .globl f

> 	.type	f, @function

> f:

> 	.word 0

> 	subl2 $4,%sp

> 	movl 4(%ap),%r2

> 	movl $42,(%r2)+

> 	movl 8(%ap),%r1

> 	clrl %r0

> 	movl $42,%r3

> 	movzbl $100,%r4

> 	movl %r3,%r5

> 	jbr .L2

> .L5:

> 	movl %r5,%r3

> .L2:

> 	movl %r3,(%r1)+

> 	incl %r0

> 	cmpl %r0,%r4

> 	jeql .L6

> 	movl $42,(%r2)+

> 	bicl3 $-2,%r0,%r3

> 	jeql .L5

> 	movl %r0,%r3

> 	jbr .L2

> .L6:

> 	ret

> 	.size	f, .-f

>

> and is clearly better: not only it is smaller, but it also uses the

> post-increment rather than indexed addressing mode in the loop, of

> which the former comes for free in terms of both performance and code

> size while the latter causes an extra byte per operand to be produced

> for the index register and also incurs an execution penalty for the

> extra address calculation.

>

> Exclude the case from VAX testing then, as already done for some other

> targets and discussed with commit d242fdaec186 ("gcc.dg/loop-8.c: Skip

> for mmix.").

>

> 	gcc/

> 	* config/vax/vax.c (vax_address_cost): Express the cost in terms

> 	of COSTS_N_INSNS.

> 	(vax_rtx_costs): Likewise.

>

> 	gcc/testsuite/

> 	* gcc.dg/loop-8.c: Exclude for `vax-*-*'.

> 	* gcc.target/vax/compare-add-zero.c: New test.

> 	* gcc.target/vax/compare-mov-zero.c: New test.

So this brings a much higher degree of consistency to the vax costing
model, which is definitely a good thing.   While expressing in terms of
COSTS_N_INSNS may not always be correct, this patch does use
COSTS_N_INSNS in many cases where it was probably missing before.   If
we feel the need to refine it further, that can certainly be done.

Note that even in the cases where it may not be correct, it's not
terrible.  Consider POST_INC/PRE_DEC.  I have a hard time seeing how
those would cost 2 fast instructions most of the time.  However, if we
have to reload the expression, then COSTS_N_INSNS (2) is probably
reasonably accurate.  And I suspect the vax port is a lot more likely to
rely on reloading to match constraints than it is on using tight
predicates where constraints are more for driving instruction selection
than reloading (as we see on riscy targets).

Ultimately the proof is in the data.  Obviously someone looking to
improve things further can do so and this patch makes the bar higher for
any such changes.

OK.

Jeff

Patch

diff --git a/gcc/config/vax/vax.c b/gcc/config/vax/vax.c
index 37f5dadc74c..b6c2210ca6b 100644
--- a/gcc/config/vax/vax.c
+++ b/gcc/config/vax/vax.c
@@ -748,7 +748,7 @@  vax_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
 		  addr_space_t as ATTRIBUTE_UNUSED,
 		  bool speed ATTRIBUTE_UNUSED)
 {
-  return (1 + (REG_P (x) ? 0 : vax_address_cost_1 (x)));
+  return COSTS_N_INSNS (1 + (REG_P (x) ? 0 : vax_address_cost_1 (x)));
 }
 
 /* Cost of an expression on a VAX.  This version has costs tuned for the
@@ -778,12 +778,13 @@  vax_rtx_costs (rtx x, machine_mode mode, int outer_code,
     case CONST_INT:
       if (INTVAL (x) == 0)
 	{
-	  *total = 0;
+	  *total = COSTS_N_INSNS (1) / 2;
 	  return true;
 	}
       if (outer_code == AND)
 	{
-	  *total = ((unsigned HOST_WIDE_INT) ~INTVAL (x) <= 077) ? 1 : 2;
+	  *total = ((unsigned HOST_WIDE_INT) ~INTVAL (x) <= 077
+		    ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2));
 	  return true;
 	}
       if ((unsigned HOST_WIDE_INT) INTVAL (x) <= 077
@@ -792,7 +793,7 @@  vax_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	  || ((outer_code == PLUS || outer_code == MINUS)
 	      && (unsigned HOST_WIDE_INT) -INTVAL (x) <= 077))
 	{
-	  *total = 1;
+	  *total = COSTS_N_INSNS (1);
 	  return true;
 	}
       /* FALLTHRU */
@@ -800,48 +801,48 @@  vax_rtx_costs (rtx x, machine_mode mode, int outer_code,
     case CONST:
     case LABEL_REF:
     case SYMBOL_REF:
-      *total = 3;
+      *total = COSTS_N_INSNS (3);
       return true;
 
     case CONST_DOUBLE:
       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-	*total = vax_float_literal (x) ? 5 : 8;
+	*total = vax_float_literal (x) ? COSTS_N_INSNS (5) : COSTS_N_INSNS (8);
       else
 	*total = ((CONST_DOUBLE_HIGH (x) == 0
 		   && (unsigned HOST_WIDE_INT) CONST_DOUBLE_LOW (x) < 64)
 		  || (outer_code == PLUS
 		      && CONST_DOUBLE_HIGH (x) == -1
-		      && (unsigned HOST_WIDE_INT)-CONST_DOUBLE_LOW (x) < 64))
-		 ? 2 : 5;
+		      && (unsigned HOST_WIDE_INT)-CONST_DOUBLE_LOW (x) < 64)
+		  ? COSTS_N_INSNS (2) : COSTS_N_INSNS (5));
       return true;
 
     case POST_INC:
-      *total = 2;
-      return true;		/* Implies register operand.  */
+      *total = COSTS_N_INSNS (2);
+      return true;			/* Implies register operand.  */
 
     case PRE_DEC:
-      *total = 3;
-      return true;		/* Implies register operand.  */
+      *total = COSTS_N_INSNS (3);
+      return true;			/* Implies register operand.  */
 
     case MULT:
       switch (mode)
 	{
 	case E_DFmode:
-	  *total = 16;		/* 4 on VAX 9000 */
+	  *total = COSTS_N_INSNS (16);	/* 4 on VAX 9000 */
 	  break;
 	case E_SFmode:
-	  *total = 9;		/* 4 on VAX 9000, 12 on VAX 2 */
+	  *total = COSTS_N_INSNS (9);	/* 4 on VAX 9000, 12 on VAX 2 */
 	  break;
 	case E_DImode:
-	  *total = 16;		/* 6 on VAX 9000, 28 on VAX 2 */
+	  *total = COSTS_N_INSNS (16);	/* 6 on VAX 9000, 28 on VAX 2 */
 	  break;
 	case E_SImode:
 	case E_HImode:
 	case E_QImode:
-	  *total = 10;		/* 3-4 on VAX 9000, 20-28 on VAX 2 */
+	  *total = COSTS_N_INSNS (10);	/* 3-4 on VAX 9000, 20-28 on VAX 2 */
 	  break;
 	default:
-	  *total = MAX_COST;	/* Mode is not supported.  */
+	  *total = MAX_COST;		/* Mode is not supported.  */
 	  return true;
 	}
       break;
@@ -849,63 +850,65 @@  vax_rtx_costs (rtx x, machine_mode mode, int outer_code,
     case UDIV:
       if (mode != SImode)
 	{
-	  *total = MAX_COST;	/* Mode is not supported.  */
+	  *total = MAX_COST;		/* Mode is not supported.  */
 	  return true;
 	}
-      *total = 17;
+      *total = COSTS_N_INSNS (17);
       break;
 
     case DIV:
       if (mode == DImode)
-	*total = 30;		/* Highly variable.  */
+	*total = COSTS_N_INSNS (30);	/* Highly variable.  */
       else if (mode == DFmode)
 	/* divide takes 28 cycles if the result is not zero, 13 otherwise */
-	*total = 24;
+	*total = COSTS_N_INSNS (24);
       else
-	*total = 11;		/* 25 on VAX 2 */
+	*total = COSTS_N_INSNS (11);	/* 25 on VAX 2 */
       break;
 
     case MOD:
-      *total = 23;
+      *total = COSTS_N_INSNS (23);
       break;
 
     case UMOD:
       if (mode != SImode)
 	{
-	  *total = MAX_COST;	/* Mode is not supported.  */
+	  *total = MAX_COST;		/* Mode is not supported.  */
 	  return true;
 	}
-      *total = 29;
+      *total = COSTS_N_INSNS (29);
       break;
 
     case FLOAT:
-      *total = (6		/* 4 on VAX 9000 */
-		+ (mode == DFmode) + (GET_MODE (XEXP (x, 0)) != SImode));
+      *total = COSTS_N_INSNS (6		/* 4 on VAX 9000 */
+			      + (mode == DFmode)
+			      + (GET_MODE (XEXP (x, 0)) != SImode));
       break;
 
     case FIX:
-      *total = 7;		/* 17 on VAX 2 */
+      *total = COSTS_N_INSNS (7);	/* 17 on VAX 2 */
       break;
 
     case ASHIFT:
     case LSHIFTRT:
     case ASHIFTRT:
       if (mode == DImode)
-	*total = 12;
+	*total = COSTS_N_INSNS (12);
       else
-	*total = 10;		/* 6 on VAX 9000 */
+	*total = COSTS_N_INSNS (10);	/* 6 on VAX 9000 */
       break;
 
     case ROTATE:
     case ROTATERT:
-      *total = 6;		/* 5 on VAX 2, 4 on VAX 9000 */
+      *total = COSTS_N_INSNS (6);	/* 5 on VAX 2, 4 on VAX 9000 */
       if (CONST_INT_P (XEXP (x, 1)))
 	fmt = "e"; 		/* all constant rotate counts are short */
       break;
 
     case PLUS:
     case MINUS:
-      *total = (mode == DFmode) ? 13 : 8; /* 6/8 on VAX 9000, 16/15 on VAX 2 */
+      *total = (mode == DFmode		/* 6/8 on VAX 9000, 16/15 on VAX 2 */
+		? COSTS_N_INSNS (13) : COSTS_N_INSNS (8));
       /* Small integer operands can use subl2 and addl2.  */
       if ((CONST_INT_P (XEXP (x, 1)))
 	  && (unsigned HOST_WIDE_INT)(INTVAL (XEXP (x, 1)) + 63) < 127)
@@ -914,16 +917,16 @@  vax_rtx_costs (rtx x, machine_mode mode, int outer_code,
 
     case IOR:
     case XOR:
-      *total = 3;
+      *total = COSTS_N_INSNS (3);
       break;
 
     case AND:
       /* AND is special because the first operand is complemented.  */
-      *total = 3;
+      *total = COSTS_N_INSNS (3);
       if (CONST_INT_P (XEXP (x, 0)))
 	{
 	  if ((unsigned HOST_WIDE_INT)~INTVAL (XEXP (x, 0)) > 63)
-	    *total = 4;
+	    *total = COSTS_N_INSNS (4);
 	  fmt = "e";
 	  i = 1;
 	}
@@ -931,38 +934,38 @@  vax_rtx_costs (rtx x, machine_mode mode, int outer_code,
 
     case NEG:
       if (mode == DFmode)
-	*total = 9;
+	*total = COSTS_N_INSNS (9);
       else if (mode == SFmode)
-	*total = 6;
+	*total = COSTS_N_INSNS (6);
       else if (mode == DImode)
-	*total = 4;
+	*total = COSTS_N_INSNS (4);
       else
-	*total = 2;
+	*total = COSTS_N_INSNS (2);
       break;
 
     case NOT:
-      *total = 2;
+      *total = COSTS_N_INSNS (2);
       break;
 
     case ZERO_EXTRACT:
     case SIGN_EXTRACT:
-      *total = 15;
+      *total = COSTS_N_INSNS (15);
       break;
 
     case MEM:
       if (mode == DImode || mode == DFmode)
-	*total = 5;		/* 7 on VAX 2 */
+	*total = COSTS_N_INSNS (5);	/* 7 on VAX 2 */
       else
-	*total = 3;		/* 4 on VAX 2 */
+	*total = COSTS_N_INSNS (3);	/* 4 on VAX 2 */
       x = XEXP (x, 0);
       if (!REG_P (x) && GET_CODE (x) != POST_INC)
-	*total += vax_address_cost_1 (x);
+	*total += COSTS_N_INSNS (vax_address_cost_1 (x));
       return true;
 
     case FLOAT_EXTEND:
     case FLOAT_TRUNCATE:
     case TRUNCATE:
-      *total = 3;		/* FIXME: Costs need to be checked  */
+      *total = COSTS_N_INSNS (3);	/* FIXME: Costs need to be checked  */
       break;
 
     default:
@@ -993,12 +996,12 @@  vax_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	case CONST_INT:
 	  if ((unsigned HOST_WIDE_INT)INTVAL (op) > 63
 	      && mode != QImode)
-	    *total += 1;	/* 2 on VAX 2 */
+	    *total += COSTS_N_INSNS (1);	/* 2 on VAX 2 */
 	  break;
 	case CONST:
 	case LABEL_REF:
 	case SYMBOL_REF:
-	  *total += 1;		/* 2 on VAX 2 */
+	  *total += COSTS_N_INSNS (1);		/* 2 on VAX 2 */
 	  break;
 	case CONST_DOUBLE:
 	  if (GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT)
@@ -1006,27 +1009,28 @@  vax_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	      /* Registers are faster than floating point constants -- even
 		 those constants which can be encoded in a single byte.  */
 	      if (vax_float_literal (op))
-		*total += 1;
+		*total += COSTS_N_INSNS (1);
 	      else
-		*total += (GET_MODE (x) == DFmode) ? 3 : 2;
+		*total += (GET_MODE (x) == DFmode
+			   ? COSTS_N_INSNS (3) : COSTS_N_INSNS (2));
 	    }
 	  else
 	    {
 	      if (CONST_DOUBLE_HIGH (op) != 0
 		  || (unsigned HOST_WIDE_INT)CONST_DOUBLE_LOW (op) > 63)
-		*total += 2;
+		*total += COSTS_N_INSNS (2);
 	    }
 	  break;
 	case MEM:
-	  *total += 1;		/* 2 on VAX 2 */
+	  *total += COSTS_N_INSNS (1);		/* 2 on VAX 2 */
 	  if (!REG_P (XEXP (op, 0)))
-	    *total += vax_address_cost_1 (XEXP (op, 0));
+	    *total += COSTS_N_INSNS (vax_address_cost_1 (XEXP (op, 0)));
 	  break;
 	case REG:
 	case SUBREG:
 	  break;
 	default:
-	  *total += 1;
+	  *total += COSTS_N_INSNS (1);
 	  break;
 	}
     }
diff --git a/gcc/testsuite/gcc.dg/loop-8.c b/gcc/testsuite/gcc.dg/loop-8.c
index af317d80a29..90ea1c45524 100644
--- a/gcc/testsuite/gcc.dg/loop-8.c
+++ b/gcc/testsuite/gcc.dg/loop-8.c
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */
-/* { dg-skip-if "unexpected IV" { "hppa*-*-* mips*-*-* visium-*-* powerpc*-*-* riscv*-*-* mmix-*-*" } } */
+/* { dg-skip-if "unexpected IV" { "hppa*-*-* mips*-*-* visium-*-* powerpc*-*-* riscv*-*-* mmix-*-* vax-*-*" } } */
 /* Load immediate on condition is available from z13 on and prevents moving
    the load out of the loop, so always run this test with -march=zEC12 that
    does not have load immediate on condition.  */
diff --git a/gcc/testsuite/gcc.target/vax/compare-add-zero.c b/gcc/testsuite/gcc.target/vax/compare-add-zero.c
new file mode 100644
index 00000000000..97d4c535c73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/vax/compare-add-zero.c
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+int
+compare_add (int x, int y)
+{
+  int z;
+
+  z = x + y;
+  if (z < 0)
+    return z;
+  else
+    return z + 2;
+}
+
+/* Expect assembly like:
+
+	addl3 4(%ap),8(%ap),%r0
+	jlss .L1
+	addl2 $2,%r0
+.L1:
+
+A reverse branch may be used at some optimization levels.  */
+
+/* Make sure the comparison is made against 0 rather than -1.  */
+/* { dg-final { scan-assembler-not "\tj(gtr|leq) " } } */
+/* { dg-final { scan-assembler "\tj(geq|lss) " } } */
diff --git a/gcc/testsuite/gcc.target/vax/compare-mov-zero.c b/gcc/testsuite/gcc.target/vax/compare-mov-zero.c
new file mode 100644
index 00000000000..c802049f58e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/vax/compare-mov-zero.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+int
+compare_mov (int x)
+{
+  if (x > 0)
+    return x;
+  else
+    return x + 2;
+}
+
+/* Expect assembly like:
+
+	movl 4(%ap),%r0
+	jgtr .L2
+	addl2 $2,%r0
+.L2:
+
+A reverse branch may be used at some optimization levels.  */
+
+/* Make sure the comparison is made against 0 rather than 1.  */
+/* { dg-final { scan-assembler-not "\tj(geq|lss) " } } */
+/* { dg-final { scan-assembler "\tj(gtr|leq) " } } */