Skip to content

Commit

Permalink
NUMBERS-204: Update DoubleSplitPerformance with the sub-normal upscaling
Browse files Browse the repository at this point in the history
  • Loading branch information
aherbert committed Aug 31, 2023
1 parent c845758 commit fdcef0e
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ final class DoublePrecision {
* Assuming the multiplier is above 2^27 and the maximum exponent is 1023 then a safe
* limit is a value with an exponent of (1023 - 27) = 2^996. */
private static final double SAFE_UPPER = 0x1.0p996;
/** The lower limit for a product {@code x * y} below which the round-off component may be
* sub-normal. This is set as 2^-1022 * 2^54. */
private static final double SAFE_LOWER = 0x1.0p-968;

/** The scale to use when down-scaling during a split into a high part.
* This must be smaller than the inverse of the multiplier and a power of 2 for exact scaling. */
Expand All @@ -69,6 +72,13 @@ final class DoublePrecision {
* This is the inverse of {@link #DOWN_SCALE}. */
private static final double UP_SCALE = 0x1.0p30;

/** The upscale factor squared. */
private static final double UP_SCALE2 = 0x1.0p60;
/** The downscale factor squared. */
private static final double DOWN_SCALE2 = 0x1.0p-60;
/** The safe upper limit so the product {@code x * y} can be upscaled by 2^60. */
private static final double SAFE_UPPER_S = 0x1.0p963;

/** The mask to zero the lower 27-bits of a long . */
private static final long ZERO_LOWER_27_BITS = 0xffff_ffff_f800_0000L;

Expand Down Expand Up @@ -402,6 +412,158 @@ static double productLow(double x, double y, double xy) {
// This could be done at a higher limit (e.g. Math.abs(xy) > Double.MAX_VALUE / 4)
// but is included here to have a single low probability branch condition.

// Add the absolute inputs for a single comparison. The sum will not be more than
// 3-fold higher than any component.
final double a = Math.abs(x);
final double b = Math.abs(y);
final double ab = Math.abs(xy);
if (a + b + ab >= SAFE_UPPER) {
// Only required to scale the largest number as x*y does not overflow.
if (a > b) {
return productLowUnscaled(x * DOWN_SCALE, y, xy * DOWN_SCALE) * UP_SCALE;
}
return productLowUnscaled(x, y * DOWN_SCALE, xy * DOWN_SCALE) * UP_SCALE;
}

// The result is computed using a product of the low parts.
// To avoid underflow in the low parts we note that these are approximately a factor
// of 2^27 smaller than the original inputs so their product will be ~2^54 smaller
// than the product xy. Ensure the product is at least 2^54 above a sub-normal.
if (ab <= SAFE_LOWER) {
// Scaling up here is safe: the largest magnitude cannot be above SAFE_LOWER / MIN_VALUE.
return productLowUnscaled(x * UP_SCALE, y * UP_SCALE, xy * UP_SCALE2) * DOWN_SCALE2;
}

// No scaling required
return productLowUnscaled(x, y, xy);
}


/**
* Compute the low part of the double length number {@code (z,zz)} for the exact
* product of {@code x} and {@code y}. This is equivalent to computing a {@code double}
* containing the magnitude of the rounding error when converting the exact 106-bit
* significand of the multiplication result to a 53-bit significand.
*
* <p>The method is written to be functionally similar to using a fused multiply add (FMA)
* operation to compute the low part, for example JDK 9's Math.fma function (note the sign
* change in the input argument for the product):
* <pre>
* double x = ...;
* double y = ...;
* double xy = x * y;
* double low1 = Math.fma(x, y, -xy);
* double low2 = DoublePrecision.productLow(x, y, xy);
* </pre>
*
* <p>Special cases:
*
* <ul>
* <li>If {@code x * y} is sub-normal or zero then the result is 0.0.
* <li>If {@code x * y} is infinite or NaN then the result is NaN.
* </ul>
*
* @param x First factor.
* @param y Second factor.
* @param xy Product of the factors (x * y).
* @return the low part of the product double length number
* @see #highPart(double)
* @see #productLow(double, double, double, double, double)
*/
static double productLowS(double x, double y, double xy) {
// Verify the input. This must be NaN safe.
//assert Double.compare(x * y, xy) == 0

// If the number is sub-normal, inf or nan there is no round-off.
if (isNotNormal(xy)) {
// Returns 0.0 for sub-normal xy, otherwise NaN for inf/nan:
return xy - xy;
}

// The result xy is finite and normal.
// Use Dekker's mul12 algorithm that splits the values into high and low parts.
// Dekker's split using multiplication will overflow if the value is within 2^27
// of double max value. It can also produce 26-bit approximations that are larger
// than the input numbers for the high part causing overflow in hx * hy when
// x * y does not overflow. So we must scale down big numbers.
// We only have to scale the largest number as we know the product does not overflow
// (if one is too big then the other cannot be).
// We also scale if the product is close to overflow to avoid intermediate overflow.
// This could be done at a higher limit (e.g. Math.abs(xy) > Double.MAX_VALUE / 4)
// but is included here to have a single low probability branch condition.

// Add the absolute inputs for a single comparison. The sum will not be more than
// 3-fold higher than any component.

// Note: To drop a branch to check for upscaling, we use a lower threshold than
// SAFE_UPPER in productLow
final double a = Math.abs(x);
final double b = Math.abs(y);
if (a + b + Math.abs(xy) >= SAFE_UPPER_S) {
// Only required to scale the largest number as x*y does not overflow.
if (a > b) {
return productLowUnscaled(x * DOWN_SCALE, y, xy * DOWN_SCALE) * UP_SCALE;
}
return productLowUnscaled(x, y * DOWN_SCALE, xy * DOWN_SCALE) * UP_SCALE;
}

// Scaling up here is safe
return productLowUnscaled(x * UP_SCALE, y * UP_SCALE, xy * UP_SCALE2) * DOWN_SCALE2;
}

/**
* Compute the low part of the double length number {@code (z,zz)} for the exact
* product of {@code x} and {@code y}. This is equivalent to computing a {@code double}
* containing the magnitude of the rounding error when converting the exact 106-bit
* significand of the multiplication result to a 53-bit significand.
*
* <p>The method is written to be functionally similar to using a fused multiply add (FMA)
* operation to compute the low part, for example JDK 9's Math.fma function (note the sign
* change in the input argument for the product):
* <pre>
* double x = ...;
* double y = ...;
* double xy = x * y;
* double low1 = Math.fma(x, y, -xy);
* double low2 = DoublePrecision.productLow(x, y, xy);
* </pre>
*
* <p>Special cases:
*
* <ul>
* <li>If {@code x * y} is sub-normal or zero then the result is 0.0.
* <li>If {@code x * y} is infinite or NaN then the result is NaN.
* </ul>
*
* @param x First factor.
* @param y Second factor.
* @param xy Product of the factors (x * y).
* @return the low part of the product double length number
* @see #highPart(double)
* @see #productLow(double, double, double, double, double)
*/
static double productLow0(double x, double y, double xy) {
// Verify the input. This must be NaN safe.
//assert Double.compare(x * y, xy) == 0

// If the number is sub-normal, inf or nan there is no round-off.
if (isNotNormal(xy)) {
// Returns 0.0 for sub-normal xy, otherwise NaN for inf/nan:
return xy - xy;
}

// The result xy is finite and normal.
// Use Dekker's mul12 algorithm that splits the values into high and low parts.
// Dekker's split using multiplication will overflow if the value is within 2^27
// of double max value. It can also produce 26-bit approximations that are larger
// than the input numbers for the high part causing overflow in hx * hy when
// x * y does not overflow. So we must scale down big numbers.
// We only have to scale the largest number as we know the product does not overflow
// (if one is too big then the other cannot be).
// We also scale if the product is close to overflow to avoid intermediate overflow.
// This could be done at a higher limit (e.g. Math.abs(xy) > Double.MAX_VALUE / 4)
// but is included here to have a single low probability branch condition.

// Add the absolute inputs for a single comparison. The sum will not be more than
// 3-fold higher than any component.
final double a = Math.abs(x);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -355,8 +355,9 @@ public static class RoundoffMethod {
* The name of the method.
*/
@Param({NONE, "multiply", "multiplyUnscaled",
"productLow", "productLow1", "productLow2", "productLow3", "productLowSplit",
"productLowUnscaled"})
"productLow", "productLowS",
"productLow0", "productLow1", "productLow2", "productLow3", "productLowSplit",
"productLowUnscaled", "fma"})
private String name;

/** The function. */
Expand Down Expand Up @@ -388,6 +389,10 @@ public void setup() {
};
} else if ("productLow".equals(name)) {
fun = (x, y) -> DoublePrecision.productLow(x, y, x * y);
} else if ("productLowS".equals(name)) {
fun = (x, y) -> DoublePrecision.productLowS(x, y, x * y);
} else if ("productLow0".equals(name)) {
fun = (x, y) -> DoublePrecision.productLow0(x, y, x * y);
} else if ("productLow1".equals(name)) {
fun = (x, y) -> DoublePrecision.productLow1(x, y, x * y);
} else if ("productLow2".equals(name)) {
Expand All @@ -404,6 +409,8 @@ public void setup() {
};
} else if ("productLowUnscaled".equals(name)) {
fun = (x, y) -> DoublePrecision.productLowUnscaled(x, y, x * y);
} else if ("fma".equals(name)) {
fun = (x, y) -> Math.fma(x, y, -x * y);
} else {
throw new IllegalStateException("Unknown round-off method: " + name);
}
Expand Down

0 comments on commit fdcef0e

Please sign in to comment.