Skip to content

Commit

Permalink
Support for argument reduction.
Browse files Browse the repository at this point in the history
  • Loading branch information
pleroy committed Oct 9, 2024
1 parent be131d1 commit 32b3507
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 9 deletions.
2 changes: 1 addition & 1 deletion benchmarks/elementary_functions_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ void BM_EvaluateElementaryFunction(benchmark::State& state) {
using Argument = double;

std::mt19937_64 random(42);
std::uniform_real_distribution<> uniformly_at(0, π / 4);
std::uniform_real_distribution<> uniformly_at(-2 * π, 2 * π);

Argument a[number_of_iterations];
for (std::int64_t i = 0; i < number_of_iterations; ++i) {
Expand Down
7 changes: 3 additions & 4 deletions functions/sin_cos_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ class SinCosTest : public ::testing::Test {};

TEST_F(SinCosTest, Random) {
std::mt19937_64 random(42);
// TODO(phl): Negative angles.
std::uniform_real_distribution<> uniformly_at(-π / 4, π / 4);
std::uniform_real_distribution<> uniformly_at(-2 * π, 2 * π);

cpp_bin_float_50 max_sin_ulps_error = 0;
cpp_bin_float_50 max_cos_ulps_error = 0;
Expand Down Expand Up @@ -69,8 +68,8 @@ TEST_F(SinCosTest, Random) {
}

// This implementation is not quite correctly rounded, but not far from it.
EXPECT_LE(max_sin_ulps_error, 0.500003);
EXPECT_LE(max_cos_ulps_error, 0.500001);
EXPECT_LE(max_sin_ulps_error, 0.500138);
EXPECT_LE(max_cos_ulps_error, 0.500146);

LOG(ERROR) << "Sin error: " << max_sin_ulps_error << std::setprecision(25)
<< " ulps for argument: " << worst_sin_argument
Expand Down
32 changes: 28 additions & 4 deletions numerics/sin_cos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "numerics/double_precision.hpp"
#include "numerics/fma.hpp"
#include "numerics/polynomial_evaluators.hpp"
#include "quantities/elementary_functions.hpp"

namespace principia {
namespace numerics {
Expand All @@ -18,6 +19,7 @@ using namespace principia::numerics::_accurate_tables;
using namespace principia::numerics::_double_precision;
using namespace principia::numerics::_fma;
using namespace principia::numerics::_polynomial_evaluators;
using namespace principia::quantities::_elementary_functions;

using Argument = double;
using Value = double;
Expand All @@ -27,7 +29,7 @@ constexpr Argument table_spacing_reciprocal = 512.0;

// π / 2 split so that the high half has 18 zeros at the end of its mantissa.
constexpr std::int64_t π_over_2_zeroes = 18;
constexpr Argument π_over_2_threshold = 1 << π_over_2_zeroes;//NONONO
constexpr Argument π_over_2_threshold = π / 2 * (1 << π_over_2_zeroes);
constexpr Argument π_over_2_high = 0x1.921fb'5444p0;
constexpr Argument π_over_2_low = 0x2.d1846'9898c'c5170'1b839p-40;

Expand All @@ -39,6 +41,17 @@ static const __m128d sign_bit =
_mm_castsi128_pd(_mm_cvtsi64_si128(0x8000'0000'0000'0000));
} // namespace masks

template<FMAPolicy fma_policy>
double FusedMultiplyAdd(double const a, double const b, double const c) {
if ((fma_policy == FMAPolicy::Force && CanEmitFMAInstructions) ||
(fma_policy == FMAPolicy::Auto && UseHardwareFMA)) {
using quantities::_elementary_functions::FusedMultiplyAdd;
return FusedMultiplyAdd(a, b, c);
} else {
return a * b + c;
}
}

void Reduce(Argument const x,
DoublePrecision<Argument>& x_reduced,
std::int64_t& quadrant) {
Expand Down Expand Up @@ -90,7 +103,8 @@ Value SinImplementation(DoublePrecision<Argument> const argument) {
if (abs_x < sin_near_zero_cutoff) {
double const x² = x * x;
double const x³ = x² * x;
return x + (x³ * SinPolynomialNearZero<fma_policy>(x²) + e);
return x + FusedMultiplyAdd<fma_policy>(
x³, SinPolynomialNearZero<fma_policy>(x²), e);
} else {
__m128d const sign = _mm_and_pd(masks::sign_bit, _mm_set_sd(x));
auto const i = _mm_cvtsd_si64(_mm_set_sd(abs_x * table_spacing_reciprocal));
Expand All @@ -100,6 +114,10 @@ Value SinImplementation(DoublePrecision<Argument> const argument) {
double const sin_x₀ =
_mm_cvtsd_f64(_mm_xor_pd(_mm_set_sd(accurate_values.sin_x), sign));
double const& cos_x₀ = accurate_values.cos_x;
// [GB91] incorporates `e` in the computation of `h`. However, `e` can be
// very small and `h` can be as large as 1/512. We note that the only
// influence of `e` is on the computation of `cos_x₀ * e`, so we incorporate
// `e` with the `SinPolynomial` below.
double const h = x - x₀;
DoublePrecision<double> const sin_x₀_plus_h_cos_x₀ =
Expand All @@ -108,7 +126,8 @@ Value SinImplementation(DoublePrecision<Argument> const argument) {
double const h³ = h² * h;
return sin_x₀_plus_h_cos_x₀.value +
((sin_x₀ * h² * CosPolynomial<fma_policy>(h²) +
cos_x₀ * (h³ * SinPolynomial<fma_policy>(h²) + e)) +
cos_x₀ * FusedMultiplyAdd<fma_policy>(
h³, SinPolynomial<fma_policy>(h²), e)) +
sin_x₀_plus_h_cos_x₀.error);
}
}
Expand All @@ -127,6 +146,10 @@ Value CosImplementation(DoublePrecision<Argument> const argument) {
double const sin_x₀ =
_mm_cvtsd_f64(_mm_xor_pd(_mm_set_sd(accurate_values.sin_x), sign));
double const& cos_x₀ = accurate_values.cos_x;
// [GB91] incorporates `e` in the computation of `h`. However, `e` can be
// very small and `h` can be as large as 1/512. We note that the only
// influence of `e` is on the computation of `sin_x₀ * e`, so we incorporate
// `e` with the `SinPolynomial` below.
double const h = x - x₀;
DoublePrecision<double> const cos_x₀_minus_h_sin_x₀ =
Expand All @@ -135,7 +158,8 @@ Value CosImplementation(DoublePrecision<Argument> const argument) {
double const h³ = h² * h;
return cos_x₀_minus_h_sin_x₀.value +
((cos_x₀ * h² * CosPolynomial<fma_policy>(h²) -
sin_x₀ * (h³ * SinPolynomial<fma_policy>(h²) + e)) +
sin_x₀ * FusedMultiplyAdd<fma_policy>(
h³, SinPolynomial<fma_policy>(h²), e)) +
cos_x₀_minus_h_sin_x₀.error);
}
Expand Down

0 comments on commit 32b3507

Please sign in to comment.