NUMBERS-204: Update DoubleSplitPerformance with the sub-normal upscaling

apache · Aug 31, 2023 · fdcef0e · fdcef0e
1 parent c845758
commit fdcef0e
Show file tree

Hide file tree

Showing 2 changed files with 171 additions and 2 deletions.
diff --git a/...mples-jmh/src/main/java/org/apache/commons/numbers/examples/jmh/core/DoublePrecision.java b/...mples-jmh/src/main/java/org/apache/commons/numbers/examples/jmh/core/DoublePrecision.java
@@ -60,6 +60,9 @@ final class DoublePrecision {
      * Assuming the multiplier is above 2^27 and the maximum exponent is 1023 then a safe
      * limit is a value with an exponent of (1023 - 27) = 2^996. */
     private static final double SAFE_UPPER = 0x1.0p996;
+    /** The lower limit for a product {@code x * y} below which the round-off component may be
+     * sub-normal. This is set as 2^-1022 * 2^54. */
+    private static final double SAFE_LOWER = 0x1.0p-968;
 
     /** The scale to use when down-scaling during a split into a high part.
      * This must be smaller than the inverse of the multiplier and a power of 2 for exact scaling. */
@@ -69,6 +72,13 @@ final class DoublePrecision {
      * This is the inverse of {@link #DOWN_SCALE}. */
     private static final double UP_SCALE = 0x1.0p30;
 
+    /** The upscale factor squared. */
+    private static final double UP_SCALE2 = 0x1.0p60;
+    /** The downscale factor squared. */
+    private static final double DOWN_SCALE2 = 0x1.0p-60;
+    /** The safe upper limit so the product {@code x * y} can be upscaled by 2^60. */
+    private static final double SAFE_UPPER_S = 0x1.0p963;
+
     /** The mask to zero the lower 27-bits of a long . */
     private static final long ZERO_LOWER_27_BITS = 0xffff_ffff_f800_0000L;
 
@@ -402,6 +412,158 @@ static double productLow(double x, double y, double xy) {
         // This could be done at a higher limit (e.g. Math.abs(xy) > Double.MAX_VALUE / 4)
         // but is included here to have a single low probability branch condition.
 
+        // Add the absolute inputs for a single comparison. The sum will not be more than
+        // 3-fold higher than any component.
+        final double a = Math.abs(x);
+        final double b = Math.abs(y);
+        final double ab = Math.abs(xy);
+        if (a + b + ab >= SAFE_UPPER) {
+            // Only required to scale the largest number as x*y does not overflow.
+            if (a > b) {
+                return productLowUnscaled(x * DOWN_SCALE, y, xy * DOWN_SCALE) * UP_SCALE;
+            }
+            return productLowUnscaled(x, y * DOWN_SCALE, xy * DOWN_SCALE) * UP_SCALE;
+        }
+
+        // The result is computed using a product of the low parts.
+        // To avoid underflow in the low parts we note that these are approximately a factor
+        // of 2^27 smaller than the original inputs so their product will be ~2^54 smaller
+        // than the product xy. Ensure the product is at least 2^54 above a sub-normal.
+        if (ab <= SAFE_LOWER) {
+            // Scaling up here is safe: the largest magnitude cannot be above SAFE_LOWER / MIN_VALUE.
+            return productLowUnscaled(x * UP_SCALE, y * UP_SCALE, xy * UP_SCALE2) * DOWN_SCALE2;
+        }
+
+        // No scaling required
+        return productLowUnscaled(x, y, xy);
+    }
+
+
+    /**
+     * Compute the low part of the double length number {@code (z,zz)} for the exact
+     * product of {@code x} and {@code y}. This is equivalent to computing a {@code double}
+     * containing the magnitude of the rounding error when converting the exact 106-bit
+     * significand of the multiplication result to a 53-bit significand.
+     *
+     * <p>The method is written to be functionally similar to using a fused multiply add (FMA)
+     * operation to compute the low part, for example JDK 9's Math.fma function (note the sign
+     * change in the input argument for the product):
+     * <pre>
+     *  double x = ...;
+     *  double y = ...;
+     *  double xy = x * y;
+     *  double low1 = Math.fma(x, y, -xy);
+     *  double low2 = DoublePrecision.productLow(x, y, xy);
+     * </pre>
+     *
+     * <p>Special cases:
+     *
+     * <ul>
+     *  <li>If {@code x * y} is sub-normal or zero then the result is 0.0.
+     *  <li>If {@code x * y} is infinite or NaN then the result is NaN.
+     * </ul>
+     *
+     * @param x First factor.
+     * @param y Second factor.
+     * @param xy Product of the factors (x * y).
+     * @return the low part of the product double length number
+     * @see #highPart(double)
+     * @see #productLow(double, double, double, double, double)
+     */
+    static double productLowS(double x, double y, double xy) {
+        // Verify the input. This must be NaN safe.
+        //assert Double.compare(x * y, xy) == 0
+
+        // If the number is sub-normal, inf or nan there is no round-off.
+        if (isNotNormal(xy)) {
+            // Returns 0.0 for sub-normal xy, otherwise NaN for inf/nan:
+            return xy - xy;
+        }
+
+        // The result xy is finite and normal.
+        // Use Dekker's mul12 algorithm that splits the values into high and low parts.
+        // Dekker's split using multiplication will overflow if the value is within 2^27
+        // of double max value. It can also produce 26-bit approximations that are larger
+        // than the input numbers for the high part causing overflow in hx * hy when
+        // x * y does not overflow. So we must scale down big numbers.
+        // We only have to scale the largest number as we know the product does not overflow
+        // (if one is too big then the other cannot be).
+        // We also scale if the product is close to overflow to avoid intermediate overflow.
+        // This could be done at a higher limit (e.g. Math.abs(xy) > Double.MAX_VALUE / 4)
+        // but is included here to have a single low probability branch condition.
+
+        // Add the absolute inputs for a single comparison. The sum will not be more than
+        // 3-fold higher than any component.
+
+        // Note: To drop a branch to check for upscaling, we use a lower threshold than
+        // SAFE_UPPER in productLow
+        final double a = Math.abs(x);
+        final double b = Math.abs(y);
+        if (a + b + Math.abs(xy) >= SAFE_UPPER_S) {
+            // Only required to scale the largest number as x*y does not overflow.
+            if (a > b) {
+                return productLowUnscaled(x * DOWN_SCALE, y, xy * DOWN_SCALE) * UP_SCALE;
+            }
+            return productLowUnscaled(x, y * DOWN_SCALE, xy * DOWN_SCALE) * UP_SCALE;
+        }
+
+        // Scaling up here is safe
+        return productLowUnscaled(x * UP_SCALE, y * UP_SCALE, xy * UP_SCALE2) * DOWN_SCALE2;
+    }
+
+    /**
+     * Compute the low part of the double length number {@code (z,zz)} for the exact
+     * product of {@code x} and {@code y}. This is equivalent to computing a {@code double}
+     * containing the magnitude of the rounding error when converting the exact 106-bit
+     * significand of the multiplication result to a 53-bit significand.
+     *
+     * <p>The method is written to be functionally similar to using a fused multiply add (FMA)
+     * operation to compute the low part, for example JDK 9's Math.fma function (note the sign
+     * change in the input argument for the product):
+     * <pre>
+     *  double x = ...;
+     *  double y = ...;
+     *  double xy = x * y;
+     *  double low1 = Math.fma(x, y, -xy);
+     *  double low2 = DoublePrecision.productLow(x, y, xy);
+     * </pre>
+     *
+     * <p>Special cases:
+     *
+     * <ul>
+     *  <li>If {@code x * y} is sub-normal or zero then the result is 0.0.
+     *  <li>If {@code x * y} is infinite or NaN then the result is NaN.
+     * </ul>
+     *
+     * @param x First factor.
+     * @param y Second factor.
+     * @param xy Product of the factors (x * y).
+     * @return the low part of the product double length number
+     * @see #highPart(double)
+     * @see #productLow(double, double, double, double, double)
+     */
+    static double productLow0(double x, double y, double xy) {
+        // Verify the input. This must be NaN safe.
+        //assert Double.compare(x * y, xy) == 0
+
+        // If the number is sub-normal, inf or nan there is no round-off.
+        if (isNotNormal(xy)) {
+            // Returns 0.0 for sub-normal xy, otherwise NaN for inf/nan:
+            return xy - xy;
+        }
+
+        // The result xy is finite and normal.
+        // Use Dekker's mul12 algorithm that splits the values into high and low parts.
+        // Dekker's split using multiplication will overflow if the value is within 2^27
+        // of double max value. It can also produce 26-bit approximations that are larger
+        // than the input numbers for the high part causing overflow in hx * hy when
+        // x * y does not overflow. So we must scale down big numbers.
+        // We only have to scale the largest number as we know the product does not overflow
+        // (if one is too big then the other cannot be).
+        // We also scale if the product is close to overflow to avoid intermediate overflow.
+        // This could be done at a higher limit (e.g. Math.abs(xy) > Double.MAX_VALUE / 4)
+        // but is included here to have a single low probability branch condition.
+
         // Add the absolute inputs for a single comparison. The sum will not be more than
         // 3-fold higher than any component.
         final double a = Math.abs(x);

diff --git a/...mh/src/main/java/org/apache/commons/numbers/examples/jmh/core/DoubleSplitPerformance.java b/...mh/src/main/java/org/apache/commons/numbers/examples/jmh/core/DoubleSplitPerformance.java
@@ -355,8 +355,9 @@ public static class RoundoffMethod {
          * The name of the method.
          */
         @Param({NONE, "multiply", "multiplyUnscaled",
-            "productLow", "productLow1", "productLow2", "productLow3", "productLowSplit",
-            "productLowUnscaled"})
+            "productLow", "productLowS",
+            "productLow0", "productLow1", "productLow2", "productLow3", "productLowSplit",
+            "productLowUnscaled", "fma"})
         private String name;
 
         /** The function. */
@@ -388,6 +389,10 @@ public void setup() {
                 };
             } else if ("productLow".equals(name)) {
                 fun = (x, y) -> DoublePrecision.productLow(x, y, x * y);
+            } else if ("productLowS".equals(name)) {
+                fun = (x, y) -> DoublePrecision.productLowS(x, y, x * y);
+            } else if ("productLow0".equals(name)) {
+                fun = (x, y) -> DoublePrecision.productLow0(x, y, x * y);
             } else if ("productLow1".equals(name)) {
                 fun = (x, y) -> DoublePrecision.productLow1(x, y, x * y);
             } else if ("productLow2".equals(name)) {
@@ -404,6 +409,8 @@ public void setup() {
                 };
             } else if ("productLowUnscaled".equals(name)) {
                 fun = (x, y) -> DoublePrecision.productLowUnscaled(x, y, x * y);
+            } else if ("fma".equals(name)) {
+                fun = (x, y) -> Math.fma(x, y, -x * y);
             } else {
                 throw new IllegalStateException("Unknown round-off method: " + name);
             }