From 32b35070f5d78c251999de36990603fd21c0943c Mon Sep 17 00:00:00 2001 From: pleroy Date: Wed, 9 Oct 2024 22:16:58 +0200 Subject: [PATCH] Support for argument reduction. --- benchmarks/elementary_functions_benchmark.cpp | 2 +- functions/sin_cos_test.cpp | 7 ++-- numerics/sin_cos.cpp | 32 ++++++++++++++++--- 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/benchmarks/elementary_functions_benchmark.cpp b/benchmarks/elementary_functions_benchmark.cpp index 83af680d80..58e5f31b45 100644 --- a/benchmarks/elementary_functions_benchmark.cpp +++ b/benchmarks/elementary_functions_benchmark.cpp @@ -28,7 +28,7 @@ void BM_EvaluateElementaryFunction(benchmark::State& state) { using Argument = double; std::mt19937_64 random(42); - std::uniform_real_distribution<> uniformly_at(0, π / 4); + std::uniform_real_distribution<> uniformly_at(-2 * π, 2 * π); Argument a[number_of_iterations]; for (std::int64_t i = 0; i < number_of_iterations; ++i) { diff --git a/functions/sin_cos_test.cpp b/functions/sin_cos_test.cpp index de665b45c0..25a650a924 100644 --- a/functions/sin_cos_test.cpp +++ b/functions/sin_cos_test.cpp @@ -23,8 +23,7 @@ class SinCosTest : public ::testing::Test {}; TEST_F(SinCosTest, Random) { std::mt19937_64 random(42); - // TODO(phl): Negative angles. - std::uniform_real_distribution<> uniformly_at(-π / 4, π / 4); + std::uniform_real_distribution<> uniformly_at(-2 * π, 2 * π); cpp_bin_float_50 max_sin_ulps_error = 0; cpp_bin_float_50 max_cos_ulps_error = 0; @@ -69,8 +68,8 @@ TEST_F(SinCosTest, Random) { } // This implementation is not quite correctly rounded, but not far from it. - EXPECT_LE(max_sin_ulps_error, 0.500003); - EXPECT_LE(max_cos_ulps_error, 0.500001); + EXPECT_LE(max_sin_ulps_error, 0.500138); + EXPECT_LE(max_cos_ulps_error, 0.500146); LOG(ERROR) << "Sin error: " << max_sin_ulps_error << std::setprecision(25) << " ulps for argument: " << worst_sin_argument diff --git a/numerics/sin_cos.cpp b/numerics/sin_cos.cpp index 0b2c20cc83..88e59ac220 100644 --- a/numerics/sin_cos.cpp +++ b/numerics/sin_cos.cpp @@ -8,6 +8,7 @@ #include "numerics/double_precision.hpp" #include "numerics/fma.hpp" #include "numerics/polynomial_evaluators.hpp" +#include "quantities/elementary_functions.hpp" namespace principia { namespace numerics { @@ -18,6 +19,7 @@ using namespace principia::numerics::_accurate_tables; using namespace principia::numerics::_double_precision; using namespace principia::numerics::_fma; using namespace principia::numerics::_polynomial_evaluators; +using namespace principia::quantities::_elementary_functions; using Argument = double; using Value = double; @@ -27,7 +29,7 @@ constexpr Argument table_spacing_reciprocal = 512.0; // π / 2 split so that the high half has 18 zeros at the end of its mantissa. constexpr std::int64_t π_over_2_zeroes = 18; -constexpr Argument π_over_2_threshold = 1 << π_over_2_zeroes;//NONONO +constexpr Argument π_over_2_threshold = π / 2 * (1 << π_over_2_zeroes); constexpr Argument π_over_2_high = 0x1.921fb'5444p0; constexpr Argument π_over_2_low = 0x2.d1846'9898c'c5170'1b839p-40; @@ -39,6 +41,17 @@ static const __m128d sign_bit = _mm_castsi128_pd(_mm_cvtsi64_si128(0x8000'0000'0000'0000)); } // namespace masks +template +double FusedMultiplyAdd(double const a, double const b, double const c) { + if ((fma_policy == FMAPolicy::Force && CanEmitFMAInstructions) || + (fma_policy == FMAPolicy::Auto && UseHardwareFMA)) { + using quantities::_elementary_functions::FusedMultiplyAdd; + return FusedMultiplyAdd(a, b, c); + } else { + return a * b + c; + } +} + void Reduce(Argument const x, DoublePrecision& x_reduced, std::int64_t& quadrant) { @@ -90,7 +103,8 @@ Value SinImplementation(DoublePrecision const argument) { if (abs_x < sin_near_zero_cutoff) { double const x² = x * x; double const x³ = x² * x; - return x + (x³ * SinPolynomialNearZero(x²) + e); + return x + FusedMultiplyAdd( + x³, SinPolynomialNearZero(x²), e); } else { __m128d const sign = _mm_and_pd(masks::sign_bit, _mm_set_sd(x)); auto const i = _mm_cvtsd_si64(_mm_set_sd(abs_x * table_spacing_reciprocal)); @@ -100,6 +114,10 @@ Value SinImplementation(DoublePrecision const argument) { double const sin_x₀ = _mm_cvtsd_f64(_mm_xor_pd(_mm_set_sd(accurate_values.sin_x), sign)); double const& cos_x₀ = accurate_values.cos_x; + // [GB91] incorporates `e` in the computation of `h`. However, `e` can be + // very small and `h` can be as large as 1/512. We note that the only + // influence of `e` is on the computation of `cos_x₀ * e`, so we incorporate + // `e` with the `SinPolynomial` below. double const h = x - x₀; DoublePrecision const sin_x₀_plus_h_cos_x₀ = @@ -108,7 +126,8 @@ Value SinImplementation(DoublePrecision const argument) { double const h³ = h² * h; return sin_x₀_plus_h_cos_x₀.value + ((sin_x₀ * h² * CosPolynomial(h²) + - cos_x₀ * (h³ * SinPolynomial(h²) + e)) + + cos_x₀ * FusedMultiplyAdd( + h³, SinPolynomial(h²), e)) + sin_x₀_plus_h_cos_x₀.error); } } @@ -127,6 +146,10 @@ Value CosImplementation(DoublePrecision const argument) { double const sin_x₀ = _mm_cvtsd_f64(_mm_xor_pd(_mm_set_sd(accurate_values.sin_x), sign)); double const& cos_x₀ = accurate_values.cos_x; + // [GB91] incorporates `e` in the computation of `h`. However, `e` can be + // very small and `h` can be as large as 1/512. We note that the only + // influence of `e` is on the computation of `sin_x₀ * e`, so we incorporate + // `e` with the `SinPolynomial` below. double const h = x - x₀; DoublePrecision const cos_x₀_minus_h_sin_x₀ = @@ -135,7 +158,8 @@ Value CosImplementation(DoublePrecision const argument) { double const h³ = h² * h; return cos_x₀_minus_h_sin_x₀.value + ((cos_x₀ * h² * CosPolynomial(h²) - - sin_x₀ * (h³ * SinPolynomial(h²) + e)) + + sin_x₀ * FusedMultiplyAdd( + h³, SinPolynomial(h²), e)) + cos_x₀_minus_h_sin_x₀.error); }