Skip to content

Commit

Permalink
Merge pull request #4114 from pleroy/CorrectRounding
Browse files Browse the repository at this point in the history
Fall back to cr_sin and cr_cos in our implementation of Sin and Cos
  • Loading branch information
pleroy authored Oct 15, 2024
2 parents 8611d65 + c4499f1 commit 6afb36e
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 50 deletions.
60 changes: 55 additions & 5 deletions functions/sin_cos_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "gtest/gtest.h"
#include "numerics/next.hpp"
#include "quantities/numbers.hpp"
#include "testing_utilities/almost_equals.hpp"

// This test lives in `functions` to avoid pulling `boost` into `numerics`.
namespace principia {
Expand All @@ -18,6 +19,7 @@ namespace _sin_cos {

using namespace boost::multiprecision;
using namespace principia::numerics::_next;
using namespace principia::testing_utilities::_almost_equals;

class SinCosTest : public ::testing::Test {};

Expand All @@ -35,7 +37,7 @@ TEST_F(SinCosTest, Random) {
#if _DEBUG
static constexpr std::int64_t iterations = 100;
#else
static constexpr std::int64_t iterations = 300'000;
static constexpr std::int64_t iterations = 1'000'000;
#endif

for (std::int64_t i = 0; i < iterations; ++i) {
Expand All @@ -55,6 +57,7 @@ TEST_F(SinCosTest, Random) {
}
if (sin_ulps_error > 0.5) {
++incorrectly_rounded_sin;
LOG(ERROR) << "Sin: " << std::setprecision(25) << principia_argument;
}
}
{
Expand All @@ -71,15 +74,16 @@ TEST_F(SinCosTest, Random) {
}
if (cos_ulps_error > 0.5) {
++incorrectly_rounded_cos;
LOG(ERROR) << "Cos: " << std::setprecision(25) << principia_argument;
}
}
}

// This implementation is not quite correctly rounded, but not far from it.
EXPECT_LE(max_sin_ulps_error, 0.500002);
EXPECT_LE(max_cos_ulps_error, 0.500002);
EXPECT_LE(incorrectly_rounded_sin, 1);
EXPECT_LE(incorrectly_rounded_cos, 1);
EXPECT_LE(max_sin_ulps_error, 0.5);
EXPECT_LE(max_cos_ulps_error, 0.5);
EXPECT_EQ(incorrectly_rounded_sin, 0);
EXPECT_EQ(incorrectly_rounded_cos, 0);

LOG(ERROR) << "Sin error: " << max_sin_ulps_error << std::setprecision(25)
<< " ulps for argument: " << worst_sin_argument
Expand All @@ -93,6 +97,52 @@ TEST_F(SinCosTest, Random) {
<< incorrectly_rounded_cos / static_cast<double>(iterations);
}

// Values for which the base algorithm gives an error of 1 ULP.
TEST_F(SinCosTest, HardRounding) {
EXPECT_THAT(Sin(1.777288458404935767021016),
AlmostEquals(0.9787561457198967196367773, 0));
EXPECT_THAT(Cos(3.912491942337291916942377),
AlmostEquals(-0.7172843528140595004137653, 0));
EXPECT_THAT(Sin(5.528810471911395296729097),
AlmostEquals(-0.6848332450871304488693046, 0));
EXPECT_THAT(Sin(2.670333644894535396474566),
AlmostEquals(0.4540084183741445456039384, 0));
EXPECT_THAT(Cos(1.486604973422413600303571),
AlmostEquals(0.0840919279825555407437241, 0));
EXPECT_THAT(Sin(-2.496680544289484160458414),
AlmostEquals(-0.6011282027544306294509797, 0));
EXPECT_THAT(Sin(3.348980952786005715893225),
AlmostEquals(-0.2059048676737040700634683, 0));
EXPECT_THAT(Sin(3.523452575387961971387085),
AlmostEquals(-0.3726470704519433130297035, 0));
EXPECT_THAT(Cos(-6.265702600230396157598989),
AlmostEquals(0.99984718137127853720984932, 0));
EXPECT_THAT(Sin(1.881458091523454001503524),
AlmostEquals(0.9521314843257784876761001, 0));
EXPECT_THAT(Sin(-1.763163156774038675678185),
AlmostEquals(-0.9815544881044536151825223, 0));
EXPECT_THAT(Cos(-3.885819786017697730073905),
AlmostEquals(-0.7356116652133562472394118, 0));
EXPECT_THAT(Sin(-2.58105062034143273308473),
AlmostEquals(-0.5316453603071467637339815, 0));
EXPECT_THAT(Sin(1.657419885978818285821035),
AlmostEquals(0.99625052493662308306103561, 0));
EXPECT_THAT(Sin(5.094301519947547873812255),
AlmostEquals(-0.9279535374988051033005616, 0));
EXPECT_THAT(Sin(5.262137362438826571064965),
AlmostEquals(-0.8526560125576488347044409, 0));
EXPECT_THAT(Cos(-5.026994177012682030181168),
AlmostEquals(0.3094410694753661206223057, 0));
EXPECT_THAT(Cos(0.2388111698570396512764091),
AlmostEquals(0.9716198764286143041422587, 0));
}

TEST_F(SinCosTest, HardReduction) {
EXPECT_THAT(Sin(0x16ac5b262ca1ffp797), AlmostEquals(1.0, 0));
EXPECT_THAT(Cos(0x16ac5b262ca1ffp797),
AlmostEquals(-4.687165924254627611122582801963884e-19, 0));
}

} // namespace _sin_cos
} // namespace numerics
} // namespace principia
138 changes: 93 additions & 45 deletions numerics/sin_cos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

#include <pmmintrin.h>

#include <limits>

#include "core-math/cos.h"
#include "core-math/sin.h"
#include "numerics/accurate_tables.mathematica.h"
#include "numerics/double_precision.hpp"
#include "numerics/fma.hpp"
Expand Down Expand Up @@ -39,6 +43,10 @@ using Polynomial1 = HornerEvaluator<Value, Argument, 1, fma_policy>;
namespace masks {
static const __m128d sign_bit =
_mm_castsi128_pd(_mm_cvtsi64_si128(0x8000'0000'0000'0000));
static const __m128d exponent_bits =
_mm_castsi128_pd(_mm_cvtsi64_si128(0x7ff0'0000'0000'0000));
static const __m128d mantissa_bits =
_mm_castsi128_pd(_mm_cvtsi64_si128(0x000f'ffff'ffff'ffff));
} // namespace masks

template<FMAPolicy fma_policy>
Expand All @@ -52,29 +60,62 @@ double FusedMultiplyAdd(double const a, double const b, double const c) {
}
}

void Reduce(Argument const x,
DoublePrecision<Argument>& x_reduced,
// Evaluates the sum `x + Δx`. If that sum has a dangerous rounding
// configuration (that is, the bits after the last mantissa bit of the sum are
// either 1000... or 0111..., then returns `NaN`. Otherwise returns the sum.
double DetectDangerousRounding(double const x, double const Δx) {
DoublePrecision<double> const sum = QuickTwoSum(x, Δx);
double const& value = sum.value;
double const& error = sum.error;
__m128i const value_exponent_128i =
_mm_castpd_si128(_mm_and_pd(masks::exponent_bits, _mm_set_sd(value)));
double const value_exponent =
_mm_cvtsd_f64(_mm_castsi128_pd(value_exponent_128i));
__m128i const error_128i =
_mm_castpd_si128(_mm_andnot_pd(masks::sign_bit, _mm_set_sd(error)));
double const normalized_error = _mm_cvtsd_f64(
_mm_castsi128_pd(_mm_sub_epi64(error_128i, value_exponent_128i)));
// TODO(phl): Error analysis to refine the thresholds. Also, can we avoid
// negative numbers?
if (normalized_error < -0x1.ffffp971 && normalized_error > -0x1.00008p972) {
#if _DEBUG
LOG(ERROR) << std::setprecision(25) << x << " " << std::hexfloat << value
<< " " << error << " " << normalized_error;
#endif
return std::numeric_limits<double>::quiet_NaN();
} else {
return value;
}
}

void Reduce(Argument const θ,
DoublePrecision<Argument>& θ_reduced,
std::int64_t& quadrant) {
if (x < π / 4 && x > -π / 4) {
x_reduced.value = x;
x_reduced.error = 0;
if (θ < π / 4 && θ > -π / 4) {
θ_reduced.value = θ;
θ_reduced.error = 0;
quadrant = 0;
} else if (x <= π_over_2_threshold && x >= -π_over_2_threshold) {
return;
} else if (θ <= π_over_2_threshold && θ >= -π_over_2_threshold) {
// We are not very sensitive to rounding errors in this expression, because
// in the worst case it could cause the reduced angle to jump from the
// vicinity of π / 4 to the vicinity of -π / 4 with appropriate adjustment
// of the quadrant.
__m128d const n_128d = _mm_round_sd(
_mm_setzero_pd(), _mm_set_sd(x * (2 / π)), _MM_FROUND_RINT);
_mm_setzero_pd(), _mm_set_sd(θ * (2 / π)), _MM_FROUND_RINT);
double const n_double = _mm_cvtsd_f64(n_128d);
std::int64_t const n = _mm_cvtsd_si64(n_128d);
Argument const value = x - n_double * π_over_2_high;
Argument const value = θ - n_double * π_over_2_high;
Argument const error = n_double * π_over_2_low;
x_reduced = QuickTwoDifference(value, error);
// TODO(phl): Check for value too small.
quadrant = n & 0b11;
θ_reduced = QuickTwoDifference(value, error);
// TODO(phl): Error analysis needed to find the right bounds.
if (θ_reduced.value < -0x1.0p-30 || θ_reduced.value > 0x1.0p-30) {
quadrant = n & 0b11;
return;
}
}
// TODO(phl): Fallback to a more precise algorithm.
θ_reduced.value = 0;
θ_reduced.error = std::numeric_limits<double>::quiet_NaN();
}

// TODO(phl): Take the perturbation into account in the polynomials.
Expand Down Expand Up @@ -102,15 +143,16 @@ Value CosPolynomial(Argument const x) {
template<FMAPolicy fma_policy>
FORCE_INLINE(inline)
Value SinImplementation(DoublePrecision<Argument> const argument) {
auto const& x = argument.value;
auto const& e = argument.error;
Value SinImplementation(DoublePrecision<Argument> const θ_reduced) {
auto const& x = θ_reduced.value;
auto const& e = θ_reduced.error;
double const abs_x = std::abs(x);
if (abs_x < sin_near_zero_cutoff) {
double const x² = x * x;
double const x³ = x² * x;
return x + FusedMultiplyAdd<fma_policy>(
double const x³_term = FusedMultiplyAdd<fma_policy>(
x³, SinPolynomialNearZero<fma_policy>(x²), e);
return DetectDangerousRounding(x, x³_term);
} else {
__m128d const sign = _mm_and_pd(masks::sign_bit, _mm_set_sd(x));
auto const i = _mm_cvtsd_si64(_mm_set_sd(abs_x * table_spacing_reciprocal));
Expand All @@ -130,19 +172,20 @@ Value SinImplementation(DoublePrecision<Argument> const argument) {
TwoProductAdd<fma_policy>(cos_x₀, h, sin_x₀);
double const h² = h * (h + 2 * e);
double const h³ = h² * h;
return sin_x₀_plus_h_cos_x₀.value +
((sin_x₀ * h² * CosPolynomial<fma_policy>(h²) +
cos_x₀ * FusedMultiplyAdd<fma_policy>(
h³, SinPolynomial<fma_policy>(h²), e)) +
sin_x₀_plus_h_cos_x₀.error);
double const polynomial_term =
((sin_x₀ * h² * CosPolynomial<fma_policy>(h²) +
cos_x₀ * FusedMultiplyAdd<fma_policy>(
h³, SinPolynomial<fma_policy>(h²), e)) +
sin_x₀_plus_h_cos_x₀.error);
return DetectDangerousRounding(sin_x₀_plus_h_cos_x₀.value, polynomial_term);
}
}

template<FMAPolicy fma_policy>
FORCE_INLINE(inline)
Value CosImplementation(DoublePrecision<Argument> const argument) {
auto const& x = argument.value;
auto const& e = argument.error;
Value CosImplementation(DoublePrecision<Argument> const θ_reduced) {
auto const& x = θ_reduced.value;
auto const& e = θ_reduced.error;
double const abs_x = std::abs(x);
__m128d const sign = _mm_and_pd(masks::sign_bit, _mm_set_sd(x));
auto const i = _mm_cvtsd_si64(_mm_set_sd(abs_x * table_spacing_reciprocal));
Expand All @@ -162,35 +205,38 @@ Value CosImplementation(DoublePrecision<Argument> const argument) {
TwoProductNegatedAdd<fma_policy>(sin_x₀, h, cos_x₀);
double const h² = h * (h + 2 * e);
double const h³ = h² * h;
return cos_x₀_minus_h_sin_x₀.value +
((cos_x₀ * h² * CosPolynomial<fma_policy>(h²) -
sin_x₀ * FusedMultiplyAdd<fma_policy>(
h³, SinPolynomial<fma_policy>(h²), e)) +
cos_x₀_minus_h_sin_x₀.error);
double const polynomial_term =
((cos_x₀ * h² * CosPolynomial<fma_policy>(h²) -
sin_x₀ * FusedMultiplyAdd<fma_policy>(
h³, SinPolynomial<fma_policy>(h²), e)) +
cos_x₀_minus_h_sin_x₀.error);
return DetectDangerousRounding(cos_x₀_minus_h_sin_x₀.value, polynomial_term);
}

#if PRINCIPIA_INLINE_SIN_COS
inline
#endif
Value __cdecl Sin(Argument const x) {
DoublePrecision<Argument> x_reduced;
Value __cdecl Sin(Argument const θ) {
DoublePrecision<Argument> θ_reduced;
std::int64_t quadrant;
Reduce(x, x_reduced, quadrant);
Reduce(θ, θ_reduced, quadrant);
double value;
if (UseHardwareFMA) {
if (quadrant & 0b1) {
value = CosImplementation<FMAPolicy::Force>(x_reduced);
value = CosImplementation<FMAPolicy::Force>(θ_reduced);
} else {
value = SinImplementation<FMAPolicy::Force>(x_reduced);
value = SinImplementation<FMAPolicy::Force>(θ_reduced);
}
} else {
if (quadrant & 0b1) {
value = CosImplementation<FMAPolicy::Disallow>(x_reduced);
value = CosImplementation<FMAPolicy::Disallow>(θ_reduced);
} else {
value = SinImplementation<FMAPolicy::Disallow>(x_reduced);
value = SinImplementation<FMAPolicy::Disallow>(θ_reduced);
}
}
if (quadrant & 0b10) {
if (value != value) {
return cr_sin(θ);
} else if (quadrant & 0b10) {
return -value;
} else {
return value;
Expand All @@ -200,25 +246,27 @@ Value __cdecl Sin(Argument const x) {
#if PRINCIPIA_INLINE_SIN_COS
inline
#endif
Value __cdecl Cos(Argument const x) {
DoublePrecision<Argument> x_reduced;
Value __cdecl Cos(Argument const θ) {
DoublePrecision<Argument> θ_reduced;
std::int64_t quadrant;
Reduce(x, x_reduced, quadrant);
Reduce(θ, θ_reduced, quadrant);
double value;
if (UseHardwareFMA) {
if (quadrant & 0b1) {
value = SinImplementation<FMAPolicy::Force>(x_reduced);
value = SinImplementation<FMAPolicy::Force>(θ_reduced);
} else {
value = CosImplementation<FMAPolicy::Force>(x_reduced);
value = CosImplementation<FMAPolicy::Force>(θ_reduced);
}
} else {
if (quadrant & 0b1) {
value = SinImplementation<FMAPolicy::Disallow>(x_reduced);
value = SinImplementation<FMAPolicy::Disallow>(θ_reduced);
} else {
value = CosImplementation<FMAPolicy::Disallow>(x_reduced);
value = CosImplementation<FMAPolicy::Disallow>(θ_reduced);
}
}
if (quadrant == 1 || quadrant == 2) {
if (value != value) {
return cr_cos(θ);
} else if (quadrant == 1 || quadrant == 2) {
return -value;
} else {
return value;
Expand Down

0 comments on commit 6afb36e

Please sign in to comment.