[08/11] libstdc++: Avoid raising fp exceptions in trunc, floor, and ceil

Message ID 2900568.OVeUXlzvHe@excalibur
State New
Headers show
Series
  • stdx::simd optimizations, corrections, and cleanups
Related show

Commit Message

Matthias Kretz June 8, 2021, 12:11 p.m.
From: Matthias Kretz <kretz@kde.org>

Signed-off-by: Matthias Kretz <m.kretz@gsi.de>

libstdc++-v3/ChangeLog:
	* include/experimental/bits/simd_x86.h (_S_trunc, _S_floor,
	_S_ceil): Set bit 8 (_MM_FROUND_NO_EXC) on AVX and SSE4.1
	roundp[sd] calls.
---
 .../include/experimental/bits/simd_x86.h      | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)


--
──────────────────────────────────────────────────────────────────────────
 Dr. Matthias Kretz                           https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research               https://gsi.de
 std::experimental::simd              https://github.com/VcDevel/std-simd
──────────────────────────────────────────────────────────────────────────

Patch

diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 5706bf63845..34633c096b1 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -2657,13 +2657,13 @@  template <typename _Abi>
 	else if constexpr (__is_avx512_pd<_Tp, _Np>())
 	  return _mm512_roundscale_pd(__x, 0x0b);
 	else if constexpr (__is_avx_ps<_Tp, _Np>())
-	  return _mm256_round_ps(__x, 0x3);
+	  return _mm256_round_ps(__x, 0xb);
 	else if constexpr (__is_avx_pd<_Tp, _Np>())
-	  return _mm256_round_pd(__x, 0x3);
+	  return _mm256_round_pd(__x, 0xb);
 	else if constexpr (__have_sse4_1 && __is_sse_ps<_Tp, _Np>())
-	  return __auto_bitcast(_mm_round_ps(__to_intrin(__x), 0x3));
+	  return __auto_bitcast(_mm_round_ps(__to_intrin(__x), 0xb));
 	else if constexpr (__have_sse4_1 && __is_sse_pd<_Tp, _Np>())
-	  return _mm_round_pd(__x, 0x3);
+	  return _mm_round_pd(__x, 0xb);
 	else if constexpr (__is_sse_ps<_Tp, _Np>())
 	  {
 	    auto __truncated
@@ -2786,13 +2786,13 @@  template <typename _Abi>
 	else if constexpr (__is_avx512_pd<_Tp, _Np>())
 	  return _mm512_roundscale_pd(__x, 0x09);
 	else if constexpr (__is_avx_ps<_Tp, _Np>())
-	  return _mm256_round_ps(__x, 0x1);
+	  return _mm256_round_ps(__x, 0x9);
 	else if constexpr (__is_avx_pd<_Tp, _Np>())
-	  return _mm256_round_pd(__x, 0x1);
+	  return _mm256_round_pd(__x, 0x9);
 	else if constexpr (__have_sse4_1 && __is_sse_ps<_Tp, _Np>())
-	  return __auto_bitcast(_mm_floor_ps(__to_intrin(__x)));
+	  return __auto_bitcast(_mm_round_ps(__to_intrin(__x), 0x9));
 	else if constexpr (__have_sse4_1 && __is_sse_pd<_Tp, _Np>())
-	  return _mm_floor_pd(__x);
+	  return _mm_round_pd(__x, 0x9);
 	else
 	  return _Base::_S_floor(__x);
       }
@@ -2808,13 +2808,13 @@  template <typename _Abi>
 	else if constexpr (__is_avx512_pd<_Tp, _Np>())
 	  return _mm512_roundscale_pd(__x, 0x0a);
 	else if constexpr (__is_avx_ps<_Tp, _Np>())
-	  return _mm256_round_ps(__x, 0x2);
+	  return _mm256_round_ps(__x, 0xa);
 	else if constexpr (__is_avx_pd<_Tp, _Np>())
-	  return _mm256_round_pd(__x, 0x2);
+	  return _mm256_round_pd(__x, 0xa);
 	else if constexpr (__have_sse4_1 && __is_sse_ps<_Tp, _Np>())
-	  return __auto_bitcast(_mm_ceil_ps(__to_intrin(__x)));
+	  return __auto_bitcast(_mm_round_ps(__to_intrin(__x), 0xa));
 	else if constexpr (__have_sse4_1 && __is_sse_pd<_Tp, _Np>())
-	  return _mm_ceil_pd(__x);
+	  return _mm_round_pd(__x, 0xa);
 	else
 	  return _Base::_S_ceil(__x);
       }