From 75171ee3b2c57692962490146f36ac6ac98b22b1 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Fri, 14 Jan 2022 14:20:02 -0500 Subject: [PATCH 01/76] adding comments to Frequencies.java --- .../cram/compression/rans/Frequencies.java | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies.java index c174ad1396..52d4d9864e 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies.java @@ -99,7 +99,7 @@ static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { // Compute statistics final int[] F = new int[RANS.NUMBER_OF_SYMBOLS]; - int T = 0; + int T = 0; //// T is the total number of symbols in the input for (int i = 0; i < inSize; i++) { F[0xFF & inBuffer.get()]++; T++; @@ -107,8 +107,10 @@ static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { final long tr = ((long) Constants.TOTFREQ << 31) / T + (1 << 30) / T; // Normalise so T[i] == TOTFREQ + // m is the maximum frequency value + // M is the symbol that has the maximum frequency int m = 0; - int M = 0; // frequency denominator ? + int M = 0; for (int j = 0; j < RANS.NUMBER_OF_SYMBOLS; j++) { if (m < F[j]) { m = F[j]; @@ -121,13 +123,18 @@ static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { if (F[j] == 0) { continue; } + // using tr to normalize symbol frequencies such that their total = (1<<12) = 4096 if ((F[j] = (int) ((F[j] * tr) >> 31)) == 0) { + // make sure that a non-zero symbol frequency is not incorrectly set to 0. + // Change it to 1 if the calculated value is 0. F[j] = 1; } fsum += F[j]; } fsum++; + // adjust the frequency of the symbol with maximum frequency to make sure that + // the sum of frequencies of all the symbols = 4096 if (fsum < Constants.TOTFREQ) { F[M] += Constants.TOTFREQ - fsum; } else { @@ -191,11 +198,15 @@ static int[][] calcFrequenciesOrder1(final ByteBuffer in) { static RANSEncodingSymbol[] buildSymsOrder0(final int[] F, final RANSEncodingSymbol[] syms) { final int[] C = new int[RANS.NUMBER_OF_SYMBOLS]; + // T = running sum of frequencies including the current symbol + // F[j] = frequency of symbol "j" + // C[j] = cumulative frequency of all the symbols preceding "j" (and excluding the frequency of symbol "j") int T = 0; for (int j = 0; j < RANS.NUMBER_OF_SYMBOLS; j++) { C[j] = T; T += F[j]; if (F[j] != 0) { + //For each symbol, set start = cumulative frequency and freq = frequency syms[j].set(C[j], F[j], Constants.TF_SHIFT); } } @@ -212,7 +223,14 @@ static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { if (rle != 0) { rle--; } else { + // write the symbol if it is the first symbol or if rle = 0. + // if rle != 0, then skip writing the symbol. cp.put((byte) j); + // We've encoded two symbol frequencies in a row. + // How many more are there? Store that count so + // we can avoid writing consecutive symbols. + // Note: maximum possible rle = 254 + // rle requires atmost 1 byte if (rle == 0 && j != 0 && F[j - 1] != 0) { for (rle = j + 1; rle < 256 && F[rle] != 0; rle++) ; @@ -225,12 +243,14 @@ static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { if (F[j] < 128) { cp.put((byte) (F[j])); } else { + // if F[j] >127, it is written in 2 bytes cp.put((byte) (128 | (F[j] >> 8))); cp.put((byte) (F[j] & 0xff)); } } } + // write 0 indicating the end of frequency table cp.put((byte) 0); return cp.position() - start; } From 7821469e90125845d78de1cd26ea1692d5fe78a5 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 1 Mar 2022 11:27:22 -0500 Subject: [PATCH 02/76] separate encode and decode classes --- .../cram/compression/ExternalCompressor.java | 10 +- .../compression/RANSExternalCompressor.java | 41 ++- .../compression/rans/ArithmeticDecoder.java | 4 +- .../cram/compression/rans/Constants.java | 6 +- .../samtools/cram/compression/rans/D04.java | 8 +- .../samtools/cram/compression/rans/D0N.java | 51 ++++ .../samtools/cram/compression/rans/D14.java | 8 +- .../samtools/cram/compression/rans/E04.java | 8 +- .../samtools/cram/compression/rans/E0N.java | 60 +++++ .../samtools/cram/compression/rans/E14.java | 8 +- .../{Frequencies.java => Frequencies4x8.java} | 36 +-- .../compression/rans/FrequenciesNx16.java | 205 +++++++++++++++ .../samtools/cram/compression/rans/RANS.java | 233 ----------------- .../cram/compression/rans/RANS4x8Decode.java | 59 +++++ .../cram/compression/rans/RANS4x8Encode.java | 112 ++++++++ .../cram/compression/rans/RANS4x8Params.java | 16 ++ .../cram/compression/rans/RANSDecode.java | 48 ++++ .../compression/rans/RANSDecodingSymbol.java | 21 +- .../cram/compression/rans/RANSEncode.java | 49 ++++ .../compression/rans/RANSEncodingSymbol.java | 38 ++- .../cram/compression/rans/RANSNx16Decode.java | 69 +++++ .../cram/compression/rans/RANSNx16Encode.java | 91 +++++++ .../cram/compression/rans/RANSNx16Params.java | 80 ++++++ .../cram/compression/rans/RANSParams.java | 19 ++ .../samtools/cram/compression/rans/Utils.java | 44 +++- .../CompressionHeaderEncodingMap.java | 26 +- .../cram/structure/CompressorCache.java | 19 +- .../htsjdk/samtools/cram/CRAMCodecCorpus.java | 45 ++++ .../samtools/cram/CRAMCodecCorpusTest.java | 247 ++++++++++++++++++ .../samtools/cram/build/SliceFactoryTest.java | 1 - .../cram/compression/rans/RansTest.java | 221 +++++++++++----- 31 files changed, 1505 insertions(+), 378 deletions(-) create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/D0N.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/E0N.java rename src/main/java/htsjdk/samtools/cram/compression/rans/{Frequencies.java => Frequencies4x8.java} (89%) create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/FrequenciesNx16.java delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/RANS.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Decode.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Encode.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Params.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Decode.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Encode.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Params.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/RANSParams.java create mode 100644 src/test/java/htsjdk/samtools/cram/CRAMCodecCorpus.java create mode 100644 src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java b/src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java index 4bc70ff46d..5bf27afbea 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java +++ b/src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java @@ -1,6 +1,7 @@ package htsjdk.samtools.cram.compression; -import htsjdk.samtools.cram.compression.rans.RANS; +import htsjdk.samtools.cram.compression.rans.RANS4x8Decode; +import htsjdk.samtools.cram.compression.rans.RANS4x8Encode; import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; import htsjdk.utils.ValidationUtils; @@ -71,8 +72,8 @@ public static ExternalCompressor getCompressorForMethod( case RANS: return compressorSpecificArg == NO_COMPRESSION_ARG ? - new RANSExternalCompressor(new RANS()) : - new RANSExternalCompressor(compressorSpecificArg, new RANS()); + new RANSExternalCompressor(new RANS4x8Encode(), new RANS4x8Decode()) : + new RANSExternalCompressor(compressorSpecificArg, new RANS4x8Encode(), new RANS4x8Decode()); case BZIP2: ValidationUtils.validateArg( @@ -85,5 +86,4 @@ public static ExternalCompressor getCompressorForMethod( } } -} - +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java b/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java index 24a3f99c7f..e89df5353d 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java +++ b/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java @@ -24,48 +24,61 @@ */ package htsjdk.samtools.cram.compression; -import htsjdk.samtools.cram.compression.rans.RANS; +import htsjdk.samtools.cram.compression.rans.RANS4x8Decode; +import htsjdk.samtools.cram.compression.rans.RANS4x8Encode; +import htsjdk.samtools.cram.compression.rans.RANS4x8Params; +import htsjdk.samtools.cram.compression.rans.RANSParams; import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; import java.nio.ByteBuffer; import java.util.Objects; public final class RANSExternalCompressor extends ExternalCompressor { - private final RANS.ORDER order; - private final RANS rans; + private final RANSParams.ORDER order; + private final RANS4x8Encode ransEncode; + private final RANS4x8Decode ransDecode; /** * We use a shared RANS instance for all compressors. * @param rans */ - public RANSExternalCompressor(final RANS rans) { - this(RANS.ORDER.ZERO, rans); + public RANSExternalCompressor( + final RANS4x8Encode ransEncode, + final RANS4x8Decode ransDecode) { + this(RANSParams.ORDER.ZERO, ransEncode, ransDecode); } - public RANSExternalCompressor(final int order, final RANS rans) { - this(RANS.ORDER.fromInt(order), rans); + public RANSExternalCompressor( + final int order, + final RANS4x8Encode ransEncode, + final RANS4x8Decode ransDecode) { + this(RANSParams.ORDER.fromInt(order), ransEncode, ransDecode); } - public RANSExternalCompressor(final RANS.ORDER order, final RANS rans) { + public RANSExternalCompressor( + final RANSParams.ORDER order, + final RANS4x8Encode ransEncode, + final RANS4x8Decode ransDecode) { super(BlockCompressionMethod.RANS); - this.rans = rans; + this.ransEncode = ransEncode; + this.ransDecode = ransDecode; this.order = order; } @Override public byte[] compress(final byte[] data) { - final ByteBuffer buffer = rans.compress(ByteBuffer.wrap(data), order); + final RANS4x8Params params = new RANS4x8Params(order); + final ByteBuffer buffer = ransEncode.compress(ByteBuffer.wrap(data), params); return toByteArray(buffer); } @Override public byte[] uncompress(byte[] data) { - final ByteBuffer buf = rans.uncompress(ByteBuffer.wrap(data)); + final RANS4x8Params params = new RANS4x8Params(order); + final ByteBuffer buf = ransDecode.uncompress(ByteBuffer.wrap(data), params); return toByteArray(buf); } - public RANS.ORDER getOrder() { return order; } - @Override public String toString() { return String.format("%s(%s)", this.getMethod(), order); @@ -96,4 +109,4 @@ private byte[] toByteArray(final ByteBuffer buffer) { return bytes; } -} +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java index 1e57c0886a..f89a8e5e30 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java @@ -27,7 +27,7 @@ final class ArithmeticDecoder { final FC[] fc = new FC[256]; - // reverse lookup table ? + // reverse lookup table byte[] R = new byte[Constants.TOTFREQ]; public ArithmeticDecoder() { @@ -45,4 +45,4 @@ public void reset() { } } -} +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java index 7c7545bfbe..9e5ad9fc5b 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java @@ -3,5 +3,7 @@ final class Constants { static final int TF_SHIFT = 12; static final int TOTFREQ = (1 << TF_SHIFT); // 4096 - static final int RANS_BYTE_L = 1 << 23; -} + static final int RANS_BYTE_L_4x8 = 1 << 23; + static final int RANS_BYTE_L_Nx16 = 1 << 15; + static final int NUMBER_OF_SYMBOLS = 256; +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/D04.java b/src/main/java/htsjdk/samtools/cram/compression/rans/D04.java index e9d9941575..b3838b5b66 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/D04.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/D04.java @@ -32,10 +32,10 @@ static void uncompress( rans2 = syms[0xFF & c2].advanceSymbolStep(rans2, Constants.TF_SHIFT); rans3 = syms[0xFF & c3].advanceSymbolStep(rans3, Constants.TF_SHIFT); - rans0 = Utils.RANSDecodeRenormalize(rans0, inBuffer); - rans1 = Utils.RANSDecodeRenormalize(rans1, inBuffer); - rans2 = Utils.RANSDecodeRenormalize(rans2, inBuffer); - rans3 = Utils.RANSDecodeRenormalize(rans3, inBuffer); + rans0 = Utils.RANSDecodeRenormalize4x8(rans0, inBuffer); + rans1 = Utils.RANSDecodeRenormalize4x8(rans1, inBuffer); + rans2 = Utils.RANSDecodeRenormalize4x8(rans2, inBuffer); + rans3 = Utils.RANSDecodeRenormalize4x8(rans3, inBuffer); } outBuffer.position(out_end); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/D0N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/D0N.java new file mode 100644 index 0000000000..6a9ba40b86 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/D0N.java @@ -0,0 +1,51 @@ +package htsjdk.samtools.cram.compression.rans; + +import java.nio.ByteBuffer; + +public class D0N { + static void uncompress( + final ByteBuffer inBuffer, + ArithmeticDecoder D, + RANSDecodingSymbol[] syms, + final ByteBuffer outBuffer, + final int out_sz, + final int Nway) { + + // Nway parallel rans states. Nway = 4 or 32 + final int[] rans = new int[Nway]; + + // c is the array of decoded symbols + final byte[] c = new byte[Nway]; + int r; + for (r=0; r0){ + byte symbol = D.R[Utils.RANSDecodeGet(rans[rev_idx], Constants.TF_SHIFT)]; + syms[0xFF & symbol].advanceSymbolNx16(rans[rev_idx], inBuffer, Constants.TF_SHIFT); + outBuffer.put(symbol); + remSize --; + rev_idx ++; + } + outBuffer.position(0); + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/D14.java b/src/main/java/htsjdk/samtools/cram/compression/rans/D14.java index ba7d598d9e..899905027c 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/D14.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/D14.java @@ -42,10 +42,10 @@ static void uncompress( rans2 = syms[l2][c2].advanceSymbolStep(rans2, Constants.TF_SHIFT); rans7 = syms[l7][c7].advanceSymbolStep(rans7, Constants.TF_SHIFT); - rans0 = Utils.RANSDecodeRenormalize(rans0, inBuffer); - rans1 = Utils.RANSDecodeRenormalize(rans1, inBuffer); - rans2 = Utils.RANSDecodeRenormalize(rans2, inBuffer); - rans7 = Utils.RANSDecodeRenormalize(rans7, inBuffer); + rans0 = Utils.RANSDecodeRenormalize4x8(rans0, inBuffer); + rans1 = Utils.RANSDecodeRenormalize4x8(rans1, inBuffer); + rans2 = Utils.RANSDecodeRenormalize4x8(rans2, inBuffer); + rans7 = Utils.RANSDecodeRenormalize4x8(rans7, inBuffer); l0 = c0; l1 = c1; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/E04.java b/src/main/java/htsjdk/samtools/cram/compression/rans/E04.java index 709c7096b0..f94e648194 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/E04.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/E04.java @@ -10,10 +10,10 @@ static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[] syms, int rans0, rans1, rans2, rans3; final ByteBuffer ptr = cp.slice(); - rans0 = Constants.RANS_BYTE_L; - rans1 = Constants.RANS_BYTE_L; - rans2 = Constants.RANS_BYTE_L; - rans3 = Constants.RANS_BYTE_L; + rans0 = Constants.RANS_BYTE_L_4x8; + rans1 = Constants.RANS_BYTE_L_4x8; + rans2 = Constants.RANS_BYTE_L_4x8; + rans3 = Constants.RANS_BYTE_L_4x8; int i; switch (i = (in_size & 3)) { diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/E0N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/E0N.java new file mode 100644 index 0000000000..64de2dea77 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/E0N.java @@ -0,0 +1,60 @@ +package htsjdk.samtools.cram.compression.rans; + +import java.nio.ByteBuffer; + +public class E0N { + static int compress( + final ByteBuffer inBuffer, + final RANSEncodingSymbol[] syms, + final ByteBuffer cp, + final int Nway) { + final int cdata_size; + final int in_size = inBuffer.remaining(); + final ByteBuffer ptr = cp.slice(); + final int[] rans = new int[Nway]; + final int[] c = new int[Nway]; // c is the array of symbols + int r; + for (r=0; r0){ + + // encode remaining elements first + int symbol_ =0xFF & inBuffer.get(in_size - rev_idx); + rans[remSize - 1] = syms[symbol_].putSymbolNx16(rans[remSize - 1], ptr); + remSize --; + rev_idx ++; + } + int i; + + for (i = (in_size - (in_size%Nway)); i > 0; i -= Nway) { + for (r = Nway - 1; r >= 0; r--){ + + // encode using Nway parallel rans states. Nway = 4 or 32 + c[r] = 0xFF & inBuffer.get(i - (Nway - r)); + rans[r] = syms[c[r]].putSymbolNx16(rans[r], ptr); + } + } + for (i=Nway-1; i>=0; i--){ + ptr.putInt(rans[i]); + } + ptr.position(); + ptr.flip(); + cdata_size = ptr.limit(); + + // since the data is encoded in reverse order, + // reverse the compressed bytes, so that it is in correct order when uncompressed. + Utils.reverse(ptr); + inBuffer.position(inBuffer.limit()); + return cdata_size; + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/E14.java b/src/main/java/htsjdk/samtools/cram/compression/rans/E14.java index 37f2767137..457e6fcd48 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/E14.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/E14.java @@ -9,10 +9,10 @@ static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[][] syms final int in_size = inBuffer.remaining(); final int compressedBlobSize; int rans0, rans1, rans2, rans3; - rans0 = Constants.RANS_BYTE_L; - rans1 = Constants.RANS_BYTE_L; - rans2 = Constants.RANS_BYTE_L; - rans3 = Constants.RANS_BYTE_L; + rans0 = Constants.RANS_BYTE_L_4x8; + rans1 = Constants.RANS_BYTE_L_4x8; + rans2 = Constants.RANS_BYTE_L_4x8; + rans3 = Constants.RANS_BYTE_L_4x8; /* * Slicing is needed for buffer reversing later. diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies4x8.java similarity index 89% rename from src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies.java rename to src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies4x8.java index 52d4d9864e..e401039a7a 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies4x8.java @@ -7,7 +7,7 @@ // F = scaled integer frequencies // M = sum(fs) -final class Frequencies { +final class Frequencies4x8 { static void readStatsOrder0(final ByteBuffer cp, final ArithmeticDecoder decoder, final RANSDecodingSymbol[] decodingSymbols) { // Pre-compute reverse lookup of frequency. @@ -98,7 +98,7 @@ static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { final int inSize = inBuffer.remaining(); // Compute statistics - final int[] F = new int[RANS.NUMBER_OF_SYMBOLS]; + final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; int T = 0; //// T is the total number of symbols in the input for (int i = 0; i < inSize; i++) { F[0xFF & inBuffer.get()]++; @@ -111,7 +111,7 @@ static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { // M is the symbol that has the maximum frequency int m = 0; int M = 0; - for (int j = 0; j < RANS.NUMBER_OF_SYMBOLS; j++) { + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if (m < F[j]) { m = F[j]; M = j; @@ -119,7 +119,7 @@ static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { } int fsum = 0; - for (int j = 0; j < RANS.NUMBER_OF_SYMBOLS; j++) { + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if (F[j] == 0) { continue; } @@ -148,8 +148,8 @@ static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { static int[][] calcFrequenciesOrder1(final ByteBuffer in) { final int in_size = in.remaining(); - final int[][] F = new int[RANS.NUMBER_OF_SYMBOLS][RANS.NUMBER_OF_SYMBOLS]; - final int[] T = new int[RANS.NUMBER_OF_SYMBOLS]; + final int[][] F = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; + final int[] T = new int[Constants.NUMBER_OF_SYMBOLS]; int c; int last_i = 0; @@ -163,14 +163,14 @@ static int[][] calcFrequenciesOrder1(final ByteBuffer in) { F[0][0xFF & in.get(3 * (in_size >> 2))]++; T[0] += 3; - for (int i = 0; i < RANS.NUMBER_OF_SYMBOLS; i++) { + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { if (T[i] == 0) { continue; } final double p = ((double) Constants.TOTFREQ) / T[i]; int t2 = 0, m = 0, M = 0; - for (int j = 0; j < RANS.NUMBER_OF_SYMBOLS; j++) { + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if (F[i][j] == 0) continue; @@ -196,13 +196,13 @@ static int[][] calcFrequenciesOrder1(final ByteBuffer in) { } static RANSEncodingSymbol[] buildSymsOrder0(final int[] F, final RANSEncodingSymbol[] syms) { - final int[] C = new int[RANS.NUMBER_OF_SYMBOLS]; + final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; // T = running sum of frequencies including the current symbol // F[j] = frequency of symbol "j" // C[j] = cumulative frequency of all the symbols preceding "j" (and excluding the frequency of symbol "j") int T = 0; - for (int j = 0; j < RANS.NUMBER_OF_SYMBOLS; j++) { + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { C[j] = T; T += F[j]; if (F[j] != 0) { @@ -217,7 +217,7 @@ static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { final int start = cp.position(); int rle = 0; - for (int j = 0; j < RANS.NUMBER_OF_SYMBOLS; j++) { + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if (F[j] != 0) { // j if (rle != 0) { @@ -256,10 +256,10 @@ static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { } static RANSEncodingSymbol[][] buildSymsOrder1(final int[][] F, final RANSEncodingSymbol[][] syms) { - for (int i = 0; i < RANS.NUMBER_OF_SYMBOLS; i++) { + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { final int[] F_i_ = F[i]; int x = 0; - for (int j = 0; j < RANS.NUMBER_OF_SYMBOLS; j++) { + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if (F_i_[j] != 0) { syms[i][j].set(x, F_i_[j], Constants.TF_SHIFT); x += F_i_[j]; @@ -272,16 +272,16 @@ static RANSEncodingSymbol[][] buildSymsOrder1(final int[][] F, final RANSEncodin static int writeFrequenciesOrder1(final ByteBuffer cp, final int[][] F) { final int start = cp.position(); - final int[] T = new int[RANS.NUMBER_OF_SYMBOLS]; + final int[] T = new int[Constants.NUMBER_OF_SYMBOLS]; - for (int i = 0; i < RANS.NUMBER_OF_SYMBOLS; i++) { - for (int j = 0; j < RANS.NUMBER_OF_SYMBOLS; j++) { + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { T[i] += F[i][j]; } } int rle_i = 0; - for (int i = 0; i < RANS.NUMBER_OF_SYMBOLS; i++) { + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { if (T[i] == 0) { continue; } @@ -304,7 +304,7 @@ static int writeFrequenciesOrder1(final ByteBuffer cp, final int[][] F) { final int[] F_i_ = F[i]; int rle_j = 0; - for (int j = 0; j < RANS.NUMBER_OF_SYMBOLS; j++) { + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if (F_i_[j] != 0) { // j diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/FrequenciesNx16.java b/src/main/java/htsjdk/samtools/cram/compression/rans/FrequenciesNx16.java new file mode 100644 index 0000000000..d77c1a7898 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/FrequenciesNx16.java @@ -0,0 +1,205 @@ +package htsjdk.samtools.cram.compression.rans; + +import java.nio.ByteBuffer; +import java.util.Arrays; + +public class FrequenciesNx16 { + + static int[] readAlphabet(final ByteBuffer cp){ + // gets the list of alphabets whose frequency!=0 + final int[] A = new int[Constants.NUMBER_OF_SYMBOLS]; + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + A[i]=0; + } + int rle = 0; + int sym = cp.get() & 0xFF; + int last_sym = sym; + do { + A[sym] = 1; + if (rle!=0) { + rle--; + sym++; + } else { + sym = cp.get() & 0xFF; + if (sym == last_sym+1) + rle = cp.get() & 0xFF; + } + last_sym = sym; + } while (sym != 0); + return A; + } + + static void readStatsOrder0( + final ByteBuffer cp, + ArithmeticDecoder decoder, + RANSDecodingSymbol[] decodingSymbols) { + // Use the Frequency table to set the values of F, C and R + final int[] A = readAlphabet(cp); + int x = 0; + final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; + + // read F, normalise F then calculate C and R + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (A[j] > 0) { + if ((F[j] = (cp.get() & 0xFF)) >= 128){ + F[j] &= ~128; + F[j] = (( F[j] &0x7f) << 7) | (cp.get() & 0x7F); + } + } + } + normaliseFrequenciesOrder0(F,12); + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if(A[j]>0){ + + // decoder.fc[j].F -> Frequency + // decoder.fc[j].C -> Cumulative Frequency preceding the current symbol + decoder.fc[j].F = F[j]; + decoder.fc[j].C = x; + decodingSymbols[j].set(decoder.fc[j].C, decoder.fc[j].F); + + // R -> Reverse Lookup table + Arrays.fill(decoder.R, x, x + decoder.fc[j].F, (byte) j); + x += decoder.fc[j].F; + } + } + } + + static int[] buildFrequenciesOrder0(final ByteBuffer inBuffer) { + // Returns an array of raw symbol frequencies + final int inSize = inBuffer.remaining(); + final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; + for (int i = 0; i < inSize; i++) { + F[0xFF & inBuffer.get()]++; + } + return F; + } + + static int[] normaliseFrequenciesOrder0(final int[] F, final int bits) { + // Returns an array of normalised Frequencies, + // such that the frequencies add up to 1<> 31)) == 0) { + + // A non-zero symbol frequency should not be incorrectly set to 0. + // If the calculated value is 0, change it to 1 + F[j] = 1; + } + fsum += F[j]; + } + + // adjust the frequency of the symbol "M" such that + // the sum of frequencies of all the symbols = renormFreq + if (fsum < renormFreq) { + F[M] += renormFreq - fsum; + } else if (fsum > renormFreq){ + F[M] -= fsum - renormFreq; + } + return F; + } + + static RANSEncodingSymbol[] buildSymsOrder0(final int[] F, final RANSEncodingSymbol[] syms) { + // updates the RANSEncodingSymbol array for all the symbols + final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; + + // T = running sum of frequencies including the current symbol + // F[j] = frequency of symbol "j" + // C[j] = cumulative frequency of all the symbols preceding "j" (excluding the frequency of symbol "j") + int T = 0; + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + C[j] = T; + T += F[j]; + if (F[j] != 0) { + + //For each symbol, set start = cumulative frequency and freq = frequency + syms[j].set(C[j], F[j], Constants.TF_SHIFT); + } + } + return syms; + } + + static void writeAlphabet(final ByteBuffer cp, final int[] F) { + // Uses Run Length Encoding to write all the symbols whose frequency!=0 + int rle = 0; + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (F[j] != 0) { + if (rle != 0) { + rle--; + } else { + + // write the symbol if it is the first symbol or if rle = 0. + // if rle != 0, then skip writing the symbol. + cp.put((byte) j); + + // We've encoded two symbol frequencies in a row. + // How many more are there? Store that count so + // we can avoid writing consecutive symbols. + // Note: maximum possible rle = 254 + // rle requires atmost 1 byte + if (rle == 0 && j != 0 && F[j - 1] != 0) { + for (rle = j + 1; rle < Constants.NUMBER_OF_SYMBOLS && F[rle] != 0; rle++); + rle -= j + 1; + cp.put((byte) rle); + } + } + } + } + + // write 0 indicating the end of alphabet + cp.put((byte) 0); + } + + static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { + // Order 0 frequencies store the complete alphabet of observed + // symbols using run length encoding, followed by a table of frequencies + // for each symbol in the alphabet. + final int start = cp.position(); + + // write the alphabet first and then their frequencies + writeAlphabet(cp,F); + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (F[j] != 0) { + if (F[j] < 128) { + cp.put((byte) (F[j] & 0x7f)); + } else { + + // if F[j] >127, it is written in 2 bytes + // right shift by 7 and get the most Significant Bits. + // Set the Most Significant Bit of the first byte to 1 indicating that the frequency comprises of 2 bytes + cp.put((byte) (128 | (F[j] >> 7))); + cp.put((byte) (F[j] & 0x7f)); //Least Significant 7 Bits + } + } + } + return cp.position() - start; + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANS.java deleted file mode 100644 index 8a4e719ff5..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS.java +++ /dev/null @@ -1,233 +0,0 @@ -package htsjdk.samtools.cram.compression.rans; - -import htsjdk.utils.ValidationUtils; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -public final class RANS { - - public enum ORDER { - ZERO, ONE; - - public static ORDER fromInt(final int orderValue) { - try { - return ORDER.values()[orderValue]; - } catch (final ArrayIndexOutOfBoundsException e) { - throw new IllegalArgumentException("Unknown rANS order: " + orderValue); - } - } - } - - // A compressed rANS stream consists of a prefix containing 3 values, followed by the compressed data block: - // byte - order of the codec (0 or 1) - // int - total compressed size of the frequency table and compressed content - // int - total size of the raw/uncompressed content - // byte[] - frequency table (RLE) - // byte[] - compressed data - - private static final int ORDER_BYTE_LENGTH = 1; - private static final int COMPRESSED_BYTE_LENGTH = 4; - private static final int RAW_BYTE_LENGTH = 4; - private static final int PREFIX_BYTE_LENGTH = ORDER_BYTE_LENGTH + COMPRESSED_BYTE_LENGTH + RAW_BYTE_LENGTH; - - // streams smaller than this value don't have sufficient symbol context for ORDER-1 encoding, - // so always use ORDER-0 - private static final int MINIMUM__ORDER_1_SIZE = 4; - private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); - - public static final int NUMBER_OF_SYMBOLS = 256; - - // working variables used by the encoder and decoder; initialize them lazily since - // they consist of lots of small objects, and we don't want to instantiate them - // until we actually use them - private ArithmeticDecoder[] D; - private RANSDecodingSymbol[][] decodingSymbols; - private RANSEncodingSymbol[][] encodingSymbols; - - // Lazy initialization of working memory for the encoder/decoder - private void initializeRANSCoder() { - if (D == null) { - D = new ArithmeticDecoder[NUMBER_OF_SYMBOLS]; - for (int i = 0; i < NUMBER_OF_SYMBOLS; i++) { - D[i] = new ArithmeticDecoder(); - } - } else { - for (int i = 0; i < NUMBER_OF_SYMBOLS; i++) { - D[i].reset(); - } - } - if (decodingSymbols == null) { - decodingSymbols = new RANSDecodingSymbol[NUMBER_OF_SYMBOLS][NUMBER_OF_SYMBOLS]; - for (int i = 0; i < decodingSymbols.length; i++) { - for (int j = 0; j < decodingSymbols[i].length; j++) { - decodingSymbols[i][j] = new RANSDecodingSymbol(); - } - } - } else { - for (int i = 0; i < decodingSymbols.length; i++) { - for (int j = 0; j < decodingSymbols[i].length; j++) { - decodingSymbols[i][j].set(0, 0); - } - } - } - if (encodingSymbols == null) { - encodingSymbols = new RANSEncodingSymbol[NUMBER_OF_SYMBOLS][NUMBER_OF_SYMBOLS]; - for (int i = 0; i < encodingSymbols.length; i++) { - for (int j = 0; j < encodingSymbols[i].length; j++) { - encodingSymbols[i][j] = new RANSEncodingSymbol(); - } - } - } else { - for (int i = 0; i < encodingSymbols.length; i++) { - for (int j = 0; j < encodingSymbols[i].length; j++) { - encodingSymbols[i][j].reset(); - } - } - } - } - - public ByteBuffer uncompress(final ByteBuffer inBuffer) { - if (inBuffer.remaining() == 0) { - return EMPTY_BUFFER; - } - - initializeRANSCoder(); - - final ORDER order = ORDER.fromInt(inBuffer.get()); - - inBuffer.order(ByteOrder.LITTLE_ENDIAN); - final int inSize = inBuffer.getInt(); - if (inSize != inBuffer.remaining() - RAW_BYTE_LENGTH) { - throw new RuntimeException("Incorrect input length."); - } - final int outSize = inBuffer.getInt(); - final ByteBuffer outBuffer = ByteBuffer.allocate(outSize); - - switch (order) { - case ZERO: - return uncompressOrder0Way4(inBuffer, outBuffer); - - case ONE: - return uncompressOrder1Way4(inBuffer, outBuffer); - - default: - throw new RuntimeException("Unknown rANS order: " + order); - } - } - - public ByteBuffer compress(final ByteBuffer inBuffer, final ORDER order) { - if (inBuffer.remaining() == 0) { - return EMPTY_BUFFER; - } - - initializeRANSCoder(); - - if (inBuffer.remaining() < MINIMUM__ORDER_1_SIZE) { - // ORDER-1 encoding of less than 4 bytes is not permitted, so just use ORDER-0 - return compressOrder0Way4(inBuffer); - } - - switch (order) { - case ZERO: - return compressOrder0Way4(inBuffer); - - case ONE: - return compressOrder1Way4(inBuffer); - - default: - throw new RuntimeException("Unknown rANS order: " + order); - } - } - - private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { - final int inSize = inBuffer.remaining(); - final ByteBuffer outBuffer = allocateOutputBuffer(inSize); - - // move the output buffer ahead to the start of the frequency table (we'll come back and - // write the output stream prefix at the end of this method) - outBuffer.position(PREFIX_BYTE_LENGTH); // start of frequency table - - final int[] F = Frequencies.calcFrequenciesOrder0(inBuffer); - Frequencies.buildSymsOrder0(F, encodingSymbols[0]); - - final ByteBuffer cp = outBuffer.slice(); - final int frequencyTableSize = Frequencies.writeFrequenciesOrder0(cp, F); - - inBuffer.rewind(); - final int compressedBlobSize = E04.compress(inBuffer, encodingSymbols[0], cp); - - // rewind and write the prefix - writeCompressionPrefix(ORDER.ZERO, outBuffer, inSize, frequencyTableSize, compressedBlobSize); - return outBuffer; - } - - private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { - final int inSize = inBuffer.remaining(); - final ByteBuffer outBuffer = allocateOutputBuffer(inSize); - - // move to start of frequency - outBuffer.position(PREFIX_BYTE_LENGTH); - - final int[][] F = Frequencies.calcFrequenciesOrder1(inBuffer); - Frequencies.buildSymsOrder1(F, encodingSymbols); - - final ByteBuffer cp = outBuffer.slice(); - final int frequencyTableSize = Frequencies.writeFrequenciesOrder1(cp, F); - - inBuffer.rewind(); - final int compressedBlobSize = E14.compress(inBuffer, encodingSymbols, cp); - - // rewind and write the prefix - writeCompressionPrefix(ORDER.ONE, outBuffer, inSize, frequencyTableSize, compressedBlobSize); - return outBuffer; - } - - private ByteBuffer uncompressOrder0Way4(final ByteBuffer inBuffer, final ByteBuffer outBuffer) { - Frequencies.readStatsOrder0(inBuffer, D[0], decodingSymbols[0]); - D04.uncompress(inBuffer, D[0], decodingSymbols[0], outBuffer); - - return outBuffer; - } - - private ByteBuffer uncompressOrder1Way4(final ByteBuffer in, final ByteBuffer outBuffer) { - Frequencies.readStatsOrder1(in, D, decodingSymbols); - D14.uncompress(in, outBuffer, D, decodingSymbols); - return outBuffer; - } - - private static ByteBuffer allocateOutputBuffer(final int inSize) { - // This calculation is identical to the one in samtools rANS_static.c - // Presumably the frequency table (always big enough for order 1) = 257*257, then * 3 for each entry - // (byte->symbol, 2 bytes -> scaled frequency), + 9 for the header (order byte, and 2 int lengths - // for compressed/uncompressed lengths) ? Plus additional 5% for..., for what ??? - final int compressedSize = (int) (1.05 * inSize + 257 * 257 * 3 + 9); - final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize); - if (outputBuffer.remaining() < compressedSize) { - throw new RuntimeException("Failed to allocate sufficient buffer size for RANS coder."); - } - outputBuffer.order(ByteOrder.LITTLE_ENDIAN); - return outputBuffer; - } - - private static void writeCompressionPrefix( - final ORDER order, - final ByteBuffer outBuffer, - final int inSize, - final int frequencyTableSize, - final int compressedBlobSize) { - ValidationUtils.validateArg(order == ORDER.ONE || order == ORDER.ZERO,"unrecognized RANS order"); - outBuffer.limit(PREFIX_BYTE_LENGTH + frequencyTableSize + compressedBlobSize); - - // go back to the beginning of the stream and write the prefix values - // write the (ORDER as a single byte at offset 0) - outBuffer.put(0, (byte) (order == ORDER.ZERO ? 0 : 1)); - outBuffer.order(ByteOrder.LITTLE_ENDIAN); - // move past the ORDER and write the compressed size - outBuffer.putInt(ORDER_BYTE_LENGTH, frequencyTableSize + compressedBlobSize); - // move past the compressed size and write the uncompressed size - outBuffer.putInt(ORDER_BYTE_LENGTH + COMPRESSED_BYTE_LENGTH, inSize); - outBuffer.rewind(); - } - -} diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Decode.java new file mode 100644 index 0000000000..24515c919f --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Decode.java @@ -0,0 +1,59 @@ +package htsjdk.samtools.cram.compression.rans; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public class RANS4x8Decode extends RANSDecode { + + private static final int RAW_BYTE_LENGTH = 4; + private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + + public ByteBuffer uncompress(final ByteBuffer inBuffer, final RANS4x8Params params) { + if (inBuffer.remaining() == 0) { + return EMPTY_BUFFER; + } + initializeRANSDecoder(); + // first byte of compressed stream gives order + final RANSParams.ORDER order = RANSParams.ORDER.fromInt(inBuffer.get()); + + // For RANS decoding, the bytes are read in little endian from the input stream + inBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // compressed bytes length + final int inSize = inBuffer.getInt(); + if (inSize != inBuffer.remaining() - RAW_BYTE_LENGTH) { + throw new RuntimeException("Incorrect input length."); + } + + // uncompressed bytes length + final int outSize = inBuffer.getInt(); + final ByteBuffer outBuffer = ByteBuffer.allocate(outSize); + + switch (order) { + case ZERO: + return uncompressOrder0Way4(inBuffer, outBuffer); + + case ONE: + return uncompressOrder1Way4(inBuffer, outBuffer); + + default: + throw new RuntimeException("Unknown rANS order: " + order); + } + } + + private ByteBuffer uncompressOrder0Way4(final ByteBuffer inBuffer, final ByteBuffer outBuffer) { + // read the frequency table. using the frequency table, set the values of RANSDecodingSymbols + Frequencies4x8.readStatsOrder0(inBuffer, getD()[0], getDecodingSymbols()[0]); + D04.uncompress(inBuffer, getD()[0], getDecodingSymbols()[0], outBuffer); + + return outBuffer; + } + + private ByteBuffer uncompressOrder1Way4(final ByteBuffer in, final ByteBuffer outBuffer) { + // read the frequency table. using the frequency table, set the values of RANSDecodingSymbols + Frequencies4x8.readStatsOrder1(in, getD(), getDecodingSymbols()); + D14.uncompress(in, outBuffer, getD(), getDecodingSymbols()); + return outBuffer; + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Encode.java new file mode 100644 index 0000000000..3788026d8b --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Encode.java @@ -0,0 +1,112 @@ +package htsjdk.samtools.cram.compression.rans; + +import htsjdk.utils.ValidationUtils; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public class RANS4x8Encode extends RANSEncode { + private static final int ORDER_BYTE_LENGTH = 1; + private static final int COMPRESSED_BYTE_LENGTH = 4; + private static final int RAW_BYTE_LENGTH = 4; + private static final int PREFIX_BYTE_LENGTH = ORDER_BYTE_LENGTH + COMPRESSED_BYTE_LENGTH + RAW_BYTE_LENGTH; + + // streams smaller than this value don't have sufficient symbol context for ORDER-1 encoding, + // so always use ORDER-0 + private static final int MINIMUM__ORDER_1_SIZE = 4; + private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + + + public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params) { + final RANSParams.ORDER order= params.getOrder(); + if (inBuffer.remaining() == 0) { + return EMPTY_BUFFER; + } + initializeRANSEncoder(); + if (inBuffer.remaining() < MINIMUM__ORDER_1_SIZE) { + // ORDER-1 encoding of less than 4 bytes is not permitted, so just use ORDER-0 + return compressOrder0Way4(inBuffer); + } + switch (order) { + case ZERO: + return compressOrder0Way4(inBuffer); + + case ONE: + return compressOrder1Way4(inBuffer); + + default: + throw new RuntimeException("Unknown rANS order: " + params.getOrder()); + } + } + + private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { + final int inSize = inBuffer.remaining(); + final ByteBuffer outBuffer = allocateOutputBuffer(inSize); + + // move the output buffer ahead to the start of the frequency table (we'll come back and + // write the output stream prefix at the end of this method) + outBuffer.position(PREFIX_BYTE_LENGTH); // start of frequency table + + // get the normalised frequencies of the alphabets + final int[] F = Frequencies4x8.calcFrequenciesOrder0(inBuffer); + + // using the normalised frequencies, set the RANSEncodingSymbols + Frequencies4x8.buildSymsOrder0(F, getEncodingSymbols()[0]); + + final ByteBuffer cp = outBuffer.slice(); + + // write Frequency table + final int frequencyTableSize = Frequencies4x8.writeFrequenciesOrder0(cp, F); + + inBuffer.rewind(); + final int compressedBlobSize = E04.compress(inBuffer, getEncodingSymbols()[0], cp); + + // write the prefix at the beginning of the output buffer + writeCompressionPrefix(RANSParams.ORDER.ZERO, outBuffer, inSize, frequencyTableSize, compressedBlobSize); + return outBuffer; + } + + private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { + final int inSize = inBuffer.remaining(); + final ByteBuffer outBuffer = allocateOutputBuffer(inSize); + + // move to start of frequency + outBuffer.position(PREFIX_BYTE_LENGTH); + + // get normalized frequencies + final int[][] F = Frequencies4x8.calcFrequenciesOrder1(inBuffer); + + // using the normalised frequencies, set the RANSEncodingSymbols + Frequencies4x8.buildSymsOrder1(F, getEncodingSymbols()); + + final ByteBuffer cp = outBuffer.slice(); + final int frequencyTableSize = Frequencies4x8.writeFrequenciesOrder1(cp, F); + + inBuffer.rewind(); + final int compressedBlobSize = E14.compress(inBuffer, getEncodingSymbols(), cp); + + // write the prefix at the beginning of the output buffer + writeCompressionPrefix(RANSParams.ORDER.ONE, outBuffer, inSize, frequencyTableSize, compressedBlobSize); + return outBuffer; + } + + private static void writeCompressionPrefix( + final RANSParams.ORDER order, + final ByteBuffer outBuffer, + final int inSize, + final int frequencyTableSize, + final int compressedBlobSize) { + ValidationUtils.validateArg(order == RANSParams.ORDER.ONE || order == RANSParams.ORDER.ZERO,"unrecognized RANS order"); + outBuffer.limit(PREFIX_BYTE_LENGTH + frequencyTableSize + compressedBlobSize); + + // go back to the beginning of the stream and write the prefix values + // write the (ORDER as a single byte at offset 0) + outBuffer.put(0, (byte) (order == RANSParams.ORDER.ZERO ? 0 : 1)); + outBuffer.order(ByteOrder.LITTLE_ENDIAN); + // move past the ORDER and write the compressed size + outBuffer.putInt(ORDER_BYTE_LENGTH, frequencyTableSize + compressedBlobSize); + // move past the compressed size and write the uncompressed size + outBuffer.putInt(ORDER_BYTE_LENGTH + COMPRESSED_BYTE_LENGTH, inSize); + outBuffer.rewind(); + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Params.java new file mode 100644 index 0000000000..0714c08107 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Params.java @@ -0,0 +1,16 @@ +package htsjdk.samtools.cram.compression.rans; + +public class RANS4x8Params implements RANSParams{ + + private ORDER order; + + public RANS4x8Params(ORDER order) { + this.order = order; + } + + @Override + public ORDER getOrder() { + return order; + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java new file mode 100644 index 0000000000..cf2f5303b1 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java @@ -0,0 +1,48 @@ +package htsjdk.samtools.cram.compression.rans; + +import java.nio.ByteBuffer; + +public abstract class RANSDecode { + private ArithmeticDecoder[] D; + private RANSDecodingSymbol[][] decodingSymbols; + + // GETTERS + public ArithmeticDecoder[] getD() { + return D; + } + + public RANSDecodingSymbol[][] getDecodingSymbols() { + return decodingSymbols; + } + + abstract ByteBuffer uncompress(final ByteBuffer inBuffer, final T params); + + // Lazy initialization of working memory for the decoder + protected void initializeRANSDecoder() { + if (D == null) { + D = new ArithmeticDecoder[Constants.NUMBER_OF_SYMBOLS]; + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + D[i] = new ArithmeticDecoder(); + } + } else { + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + D[i].reset(); + } + } + if (decodingSymbols == null) { + decodingSymbols = new RANSDecodingSymbol[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; + for (int i = 0; i < decodingSymbols.length; i++) { + for (int j = 0; j < decodingSymbols[i].length; j++) { + decodingSymbols[i][j] = new RANSDecodingSymbol(); + } + } + } else { + for (int i = 0; i < decodingSymbols.length; i++) { + for (int j = 0; j < decodingSymbols[i].length; j++) { + decodingSymbols[i][j].set(0, 0); + } + } + } + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java index 3dde8f8c02..49a18f2904 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java @@ -60,11 +60,28 @@ public int advanceSymbol(final int rIn, final ByteBuffer byteBuffer, final int s r = freq * (r >> scaleBits) + (r & mask) - start; // re-normalize - if (r < Constants.RANS_BYTE_L) { + if (r < Constants.RANS_BYTE_L_4x8) { do { final int b = 0xFF & byteBuffer.get(); r = (r << 8) | b; - } while (r < Constants.RANS_BYTE_L); + } while (r < Constants.RANS_BYTE_L_4x8); + } + + return r; + } + + public int advanceSymbolNx16(final int rIn, final ByteBuffer byteBuffer, final int scaleBits) { + final int mask = (1 << scaleBits) - 1; + + // s, x = D(x) + int r = rIn; + r = freq * (r >> scaleBits) + (r & mask) - start; + + // re-normalize + if (r < (Constants.RANS_BYTE_L_Nx16)){ + int i = 0xFF & byteBuffer.get(); + i |= (0xFF & byteBuffer.get())<<8; + r = (r << 16) + i; } return r; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java new file mode 100644 index 0000000000..244b1082ce --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java @@ -0,0 +1,49 @@ +package htsjdk.samtools.cram.compression.rans; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public abstract class RANSEncode { + private RANSEncodingSymbol[][] encodingSymbols; + + // Getter + public RANSEncodingSymbol[][] getEncodingSymbols() { + return encodingSymbols; + } + + abstract ByteBuffer compress(final ByteBuffer inBuffer, final T params); + + // Lazy initialization of working memory for the encoder + protected void initializeRANSEncoder() { + if (encodingSymbols == null) { + encodingSymbols = new RANSEncodingSymbol[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; + for (int i = 0; i < encodingSymbols.length; i++) { + for (int j = 0; j < encodingSymbols[i].length; j++) { + encodingSymbols[i][j] = new RANSEncodingSymbol(); + } + } + } else { + for (int i = 0; i < encodingSymbols.length; i++) { + for (int j = 0; j < encodingSymbols[i].length; j++) { + encodingSymbols[i][j].reset(); + } + } + } + } + + protected ByteBuffer allocateOutputBuffer(final int inSize) { + // TODO: This should vary depending on the RANS type and order + // This calculation is identical to the one in samtools rANS_static.c + // Presumably the frequency table (always big enough for order 1) = 257*257, then * 3 for each entry + // (byte->symbol, 2 bytes -> scaled frequency), + 9 for the header (order byte, and 2 int lengths + // for compressed/uncompressed lengths) ? Plus additional 5% for..., for what ??? + final int compressedSize = (int) (1.05 * inSize + 257 * 257 * 3 + 9); + final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize); + if (outputBuffer.remaining() < compressedSize) { + throw new RuntimeException("Failed to allocate sufficient buffer size for RANS coder."); + } + outputBuffer.order(ByteOrder.LITTLE_ENDIAN); + return outputBuffer; + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java index 2d70255416..e1e9b0b549 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java @@ -40,10 +40,10 @@ public void reset() { } public void set(final int start, final int freq, final int scaleBits) { - // RansAssert(scale_bits <= 16); RansAssert(start <= (1u << - // scale_bits)); RansAssert(freq <= (1u << scale_bits) - start); - xMax = ((Constants.RANS_BYTE_L >> scaleBits) << 8) * freq; + // Rans4x8: xMax = ((Constants.RANS_BYTE_L_4x8 >> scaleBits) << 8) * freq = (1<< 31-scaleBits) * freq + // RansNx16: xMax = ((Constants.RANS_BYTE_L_Nx16 >> scaleBits) << 16) * freq = (1<< 31-scaleBits) * freq + xMax = (1<< (31-scaleBits)) * freq; cmplFreq = (1 << scaleBits) - freq; if (freq < 2) { rcpFreq = (int) ~0L; @@ -56,7 +56,6 @@ public void set(final int start, final int freq, final int scaleBits) { while (freq > (1L << shift)) { shift++; } - rcpFreq = (int) (((1L << (shift + 31)) + freq - 1) / freq); rcpShift = shift - 1; @@ -64,7 +63,6 @@ public void set(final int start, final int freq, final int scaleBits) { // have bias=start. bias = start; } - rcpShift += 32; // Avoid the extra >>32 in RansEncPutSymbol } @@ -93,4 +91,32 @@ public int putSymbol(int r, final ByteBuffer byteBuffer) { r = (int) (x + bias + q * cmplFreq); return r; } -} + + public int putSymbolNx16(int r, final ByteBuffer byteBuffer) { + ValidationUtils.validateArg(xMax != 0, "can't encode symbol with freq=0"); + + // re-normalize + int x = r; + if (x >= xMax) { + byteBuffer.put((byte) ((x>>8) & 0xFF)); // extra line - 1 more byte + byteBuffer.put((byte) (x & 0xFF)); + x >>=16; + if (x >= xMax) { + byteBuffer.put((byte) ((x>>8) & 0xFF)); // extra line - 1 more byte + byteBuffer.put((byte) (x & 0xFF)); + x >>=16; + } + } + + // x = C(s,x) + // NOTE: written this way so we get a 32-bit "multiply high" when + // available. If you're on a 64-bit platform with cheap multiplies + // (e.g. x64), just bake the +32 into rcp_shift. + // int q = (int) (((uint64_t)x * sym.rcp_freq) >> 32) >> sym.rcp_shift; + + // The extra >>32 has already been added to RansEncSymbolInit + final long q = ((x * (0xFFFFFFFFL & rcpFreq)) >> rcpShift); + r = (int) (x + bias + q * cmplFreq); + return r; + } +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Decode.java new file mode 100644 index 0000000000..82dda96c20 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Decode.java @@ -0,0 +1,69 @@ +package htsjdk.samtools.cram.compression.rans; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public class RANSNx16Decode extends RANSDecode{ + private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + + public ByteBuffer uncompress(final ByteBuffer inBuffer, final RANSNx16Params params) { + if (inBuffer.remaining() == 0) { + return EMPTY_BUFFER; + } + + // For RANS decoding, the bytes are read in little endian from the input stream + inBuffer.order(ByteOrder.LITTLE_ENDIAN); + initializeRANSDecoder(); + + // the first byte of compressed stream gives the formatFlags + final int formatFlags = inBuffer.get(); + params.setFormatFlags(formatFlags); + int n_out = params.getnOut(); + final RANSParams.ORDER order = params.getOrder(); // Order-0 or Order-1 entropy coding + final boolean x32 = params.getX32(); // Interleave N = 32 rANS states (else N = 4) + final boolean stripe = params.getStripe(); //multiway interleaving of byte streams + final boolean nosz = params.getNosz(); // original size is not recorded + final boolean cat = params.getCAT(); // Data is uncompressed + final boolean rle = params.getRLE(); // Run length encoding, with runs and literals encoded separately + final boolean pack = params.getPack(); // Pack 2, 4, 8 or infinite symbols per byte + + // TODO: add methods to handle various flags + + // N-way interleaving. If the NWay flag is set, use 32 way interleaving, else use 4 way + final int Nway = (x32) ? 32 : 4; + + // if nosz is set, then uncompressed size is not recorded. + if (!nosz) { + n_out = Utils.readUint7(inBuffer); + } + ByteBuffer outBuffer = ByteBuffer.allocate(n_out); + + // If CAT is set then, the input is uncompressed + if (cat){ + byte[] data = new byte[n_out]; + outBuffer = inBuffer.get( data,0, n_out); + } + else { + switch (order){ + case ZERO: + outBuffer = uncompressOrder0WayN(inBuffer,outBuffer, n_out,Nway); + break; +// case ONE: +// uncompressOrder1WayN(inBuffer,n_out, Nway); +// break; + default: + throw new RuntimeException("Unknown rANS order: " + order); + } + } + return outBuffer; + } + + private ByteBuffer uncompressOrder0WayN(final ByteBuffer inBuffer, final ByteBuffer outBuffer,final int n_out,final int Nway) { + // read the frequency table, get the normalised frequencies and use it to set the RANSDecodingSymbols + FrequenciesNx16.readStatsOrder0(inBuffer, getD()[0], getDecodingSymbols()[0]); + // uncompress using Nway rans states + D0N.uncompress(inBuffer, getD()[0], getDecodingSymbols()[0], outBuffer,n_out,Nway); + return outBuffer; + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Encode.java new file mode 100644 index 0000000000..8c927f3c04 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Encode.java @@ -0,0 +1,91 @@ +package htsjdk.samtools.cram.compression.rans; + +import java.nio.ByteBuffer; + +public class RANSNx16Encode extends RANSEncode{ + private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + private static final int MINIMUM__ORDER_1_SIZE = 4; + + public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params params) { + if (inBuffer.remaining() == 0) { + return EMPTY_BUFFER; + } + final ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining()); + final int formatFlags = params.getFormatFlags(); + outBuffer.put((byte) (formatFlags)); // one byte for formatFlags + final RANSParams.ORDER order = params.getOrder(); // Order-0 or Order-1 entropy coding + final boolean x32 = params.getX32(); // Interleave N = 32 rANS states (else N = 4) + final boolean stripe = params.getStripe(); //multiway interleaving of byte streams + final boolean nosz = params.getNosz(); // original size is not recorded + final boolean cat = params.getCAT(); // Data is uncompressed + final boolean rle = params.getRLE(); // Run length encoding, with runs and literals encoded separately + final boolean pack = params.getPack(); // Pack 2, 4, 8 or infinite symbols per byte + + // TODO: add methods to handle various flags + + // N-way interleaving + final int Nway = (x32) ? 32 : 4; + + //stripe size + final int N = formatFlags>>8; + + if (!nosz) { + int insize = inBuffer.remaining(); + Utils.writeUint7(insize,outBuffer); + } + initializeRANSEncoder(); + if (cat) { + outBuffer.put(inBuffer); + return outBuffer; + } + + if (inBuffer.remaining() < MINIMUM__ORDER_1_SIZE) { + // TODO: check if this still applies for Nx16 or if there is a different limit + // ORDER-1 encoding of less than 4 bytes is not permitted, so just use ORDER-0 + return compressOrder0WayN(inBuffer, Nway, outBuffer); + } + + switch (order) { + case ZERO: + return compressOrder0WayN(inBuffer, Nway, outBuffer); +// case ONE: +// return compressOrder1WayN(inBuffer, Nway, outBuffer); + default: + throw new RuntimeException("Unknown rANS order: " + order); + } + } + + private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final int Nway, final ByteBuffer outBuffer) { + final int inSize = inBuffer.remaining(); + final int[] F = FrequenciesNx16.buildFrequenciesOrder0(inBuffer); + final ByteBuffer cp = outBuffer.slice(); + int bitSize = (int) Math.ceil(Math.log(inSize) / Math.log(2)); + if (bitSize == 0) { + // TODO: check this! + // If there is just one symbol, bitsize = log (1)/log(2) = 0. + bitSize = 1; + } + if (bitSize > 12) { + bitSize = 12; + } + final int prefix_size = outBuffer.position(); + + // Normalize Frequencies such that sum of Frequencies = 1 << bitsize + FrequenciesNx16.normaliseFrequenciesOrder0(F, bitSize); + + // Write the Frequency table. Keep track of the size for later + final int frequencyTableSize = FrequenciesNx16.writeFrequenciesOrder0(cp, F); + + // Normalize Frequencies such that sum of Frequencies = 1 << 12 + FrequenciesNx16.normaliseFrequenciesOrder0(F, 12); + + // update the RANS Encoding Symbols + FrequenciesNx16.buildSymsOrder0(F, getEncodingSymbols()[0]); + inBuffer.rewind(); + final int compressedBlobSize = E0N.compress(inBuffer, getEncodingSymbols()[0], cp, Nway); + outBuffer.rewind(); // set position to 0 + outBuffer.limit(prefix_size + frequencyTableSize + compressedBlobSize); + return outBuffer; + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Params.java new file mode 100644 index 0000000000..16d6cd42ef --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Params.java @@ -0,0 +1,80 @@ +package htsjdk.samtools.cram.compression.rans; + +public class RANSNx16Params implements RANSParams{ + + // format is the first byte of the compressed data stream, + // which consists of all the bit-flags detailing the type of transformations + // and entropy encoders to be combined + private int formatFlags; + + // To get the least significant 7 bits of format byte + private static final int FORMAT_FLAG_MASK = 0x7f; + + // RANS Nx16 Bit Flags + private static final int ORDER_FLAG_MASK = 0x01; + private static final int X32_FLAG_MASK = 0x04; + private static final int STRIPE_FLAG_MASK = 0x08; + private static final int NOSZ_FLAG_MASK = 0x10; + private static final int CAT_FLAG_MASK = 0x20; + private static final int RLE_FLAG_MASK = 0x40; + private static final int PACK_FLAG_MASK = 0x80; + + // output length. Used as input param to RANS Nx16 uncompress method + private final int nOut = 0; + + public RANSNx16Params(int formatFlags) { + this.formatFlags = formatFlags; + } + + @Override + public ORDER getOrder() { + // Rans Order ZERO or ONE encoding + return ORDER.fromInt(formatFlags & ORDER_FLAG_MASK); //convert into order type + } + + protected int getFormatFlags(){ + // Least significant 7 bits of the format + return formatFlags & FORMAT_FLAG_MASK; + } + + public void setFormatFlags(int formatFlags) { + this.formatFlags = formatFlags; + } + + protected boolean getX32(){ + // Interleave N = 32 rANS states (else N = 4) + return ((formatFlags & X32_FLAG_MASK)!=0); + } + + protected boolean getStripe(){ + // multiway interleaving of byte streams + return ((formatFlags & STRIPE_FLAG_MASK)!=0); + } + + protected boolean getNosz(){ + // original size is not recorded (for use by Stripe) + return ((formatFlags & NOSZ_FLAG_MASK)!=0); + } + + protected boolean getCAT(){ + // Data is uncompressed + return ((formatFlags & CAT_FLAG_MASK)!=0); + } + + protected boolean getRLE(){ + // Run length encoding, with runs and literals encoded separately + return ((formatFlags & RLE_FLAG_MASK)!=0); + } + + protected boolean getPack(){ + // Pack 2, 4, 8 or infinite symbols per byte + return ((formatFlags & PACK_FLAG_MASK)!=0); + } + + public int getnOut() { + // nOut is the length of uncompressed data + // used in uncompress method + return nOut; + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSParams.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSParams.java new file mode 100644 index 0000000000..ba87594e4e --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSParams.java @@ -0,0 +1,19 @@ +package htsjdk.samtools.cram.compression.rans; + +public interface RANSParams { + + enum ORDER { + ZERO, ONE; + + public static ORDER fromInt(final int orderValue) { + try { + return ORDER.values()[orderValue]; + } catch (final ArrayIndexOutOfBoundsException e) { + throw new IllegalArgumentException("Unknown rANS order: " + orderValue); + } + } + } + + ORDER getOrder(); + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java index d2da830eb5..f8066ad6d0 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java @@ -39,14 +39,52 @@ static int RANSDecodeGet(final int r, final int scaleBits) { } // Re-normalize. - static int RANSDecodeRenormalize(int r, final ByteBuffer byteBuffer) { + static int RANSDecodeRenormalize4x8(int r, final ByteBuffer byteBuffer) { // re-normalize - if (r < Constants.RANS_BYTE_L) { + + //rans4x8 + if (r < Constants.RANS_BYTE_L_4x8) { do { r = (r << 8) | (0xFF & byteBuffer.get()); - } while (r < Constants.RANS_BYTE_L); + } while (r < Constants.RANS_BYTE_L_4x8); } + return r; + } + static int RANSDecodeRenormalizeNx16(int r, final ByteBuffer byteBuffer) { + // ransNx16 + if (r < (Constants.RANS_BYTE_L_Nx16)) { + int i = (0xFF & byteBuffer.get()); + i |= (0xFF & byteBuffer.get()) <<8; + + r = (r << 16) | i; + } return r; } + + public static void writeUint7(int i, ByteBuffer cp){ + int s = 0; + int X = i; + do { + s += 7; + X >>= 7; + }while (X>0); + do { + s -=7; + //writeByte + int s_ = (s > 0)?1:0; + cp.put((byte) (((i >> s) & 0x7f) + (s_ << 7))); + } while (s>0); + } + + public static int readUint7(ByteBuffer cp){ + int i = 0; + int c; + do { + //read byte + c = cp.get(); + i = (i<<7) | (c & 0x7f); + } while((c & 0x80)!=0); + return i; + } } diff --git a/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java b/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java index 638089c5e0..0b3137a473 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java @@ -26,7 +26,7 @@ import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.compression.ExternalCompressor; -import htsjdk.samtools.cram.compression.rans.RANS; +import htsjdk.samtools.cram.compression.rans.RANS4x8Params; import htsjdk.samtools.cram.encoding.CRAMEncoding; import htsjdk.samtools.cram.encoding.external.ByteArrayStopEncoding; import htsjdk.samtools.cram.encoding.external.ExternalByteEncoding; @@ -38,10 +38,20 @@ import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; import htsjdk.utils.ValidationUtils; -import java.io.*; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.nio.ByteBuffer; -import java.util.*; import htsjdk.samtools.util.Log; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; /** * Maintains a map of DataSeries to EncodingDescriptor, and a second map that contains the compressor to use @@ -278,12 +288,12 @@ public ExternalCompressor getBestExternalCompressor(final byte[] data, final CRA final ExternalCompressor rans0 = compressorCache.getCompressorForMethod( BlockCompressionMethod.RANS, - RANS.ORDER.ZERO.ordinal()); + RANS4x8Params.ORDER.ZERO.ordinal()); final int rans0Len = rans0.compress(data).length; final ExternalCompressor rans1 = compressorCache.getCompressorForMethod( BlockCompressionMethod.RANS, - RANS.ORDER.ONE.ordinal()); + RANS4x8Params.ORDER.ONE.ordinal()); final int rans1Len = rans1.compress(data).length; // find the best of general purpose codecs: @@ -387,14 +397,14 @@ private void putExternalGzipEncoding(final CRAMEncodingStrategy encodingStrategy private void putExternalRansOrderOneEncoding(final DataSeries dataSeries) { putExternalEncoding( dataSeries, - compressorCache.getCompressorForMethod(BlockCompressionMethod.RANS, RANS.ORDER.ONE.ordinal())); + compressorCache.getCompressorForMethod(BlockCompressionMethod.RANS, RANS4x8Params.ORDER.ONE.ordinal())); } // add an external encoding appropriate for the dataSeries value type, with a RANS order 0 compressor private void putExternalRansOrderZeroEncoding(final DataSeries dataSeries) { putExternalEncoding( dataSeries, - compressorCache.getCompressorForMethod(BlockCompressionMethod.RANS, RANS.ORDER.ZERO.ordinal())); + compressorCache.getCompressorForMethod(BlockCompressionMethod.RANS, RANS4x8Params.ORDER.ZERO.ordinal())); } @Override @@ -415,4 +425,4 @@ public int hashCode() { return result; } -} +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java b/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java index 7021664be3..9026daa004 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java @@ -24,8 +24,9 @@ */ package htsjdk.samtools.cram.structure; -import htsjdk.samtools.cram.compression.*; -import htsjdk.samtools.cram.compression.rans.RANS; +import htsjdk.samtools.cram.compression.ExternalCompressor; +import htsjdk.samtools.cram.compression.RANSExternalCompressor; +import htsjdk.samtools.cram.compression.rans.*; import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; import htsjdk.samtools.util.Tuple; import htsjdk.utils.ValidationUtils; @@ -40,7 +41,8 @@ public class CompressorCache { private final String argErrorMessage = "Invalid compression arg (%d) requested for CRAM %s compressor"; private final HashMap, ExternalCompressor> compressorCache = new HashMap<>(); - private RANS sharedRANS; + private RANS4x8Encode sharedRANSEncode; + private RANS4x8Decode sharedRANSDecode; /** * Return a compressor if its in our cache, otherwise spin one up and cache it and return it. @@ -67,18 +69,21 @@ public ExternalCompressor getCompressorForMethod( // for efficiency, we want to share the same underlying RANS object with both order-0 and // order-1 ExternalCompressors final int ransArg = compressorSpecificArg == ExternalCompressor.NO_COMPRESSION_ARG ? - RANS.ORDER.ZERO.ordinal() : + RANS4x8Params.ORDER.ZERO.ordinal() : compressorSpecificArg; final Tuple compressorTuple = new Tuple<>( BlockCompressionMethod.RANS, ransArg); if (!compressorCache.containsKey(compressorTuple)) { - if (sharedRANS == null) { - sharedRANS = new RANS(); + if (sharedRANSEncode == null) { + sharedRANSEncode = new RANS4x8Encode(); + } + if (sharedRANSDecode == null) { + sharedRANSDecode = new RANS4x8Decode(); } compressorCache.put( new Tuple(BlockCompressionMethod.RANS, ransArg), - new RANSExternalCompressor(ransArg, sharedRANS) + new RANSExternalCompressor(ransArg, sharedRANSEncode, sharedRANSDecode) ); } return getCachedCompressorForMethod(compressorTuple.a, compressorTuple.b); diff --git a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpus.java b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpus.java new file mode 100644 index 0000000000..2283650256 --- /dev/null +++ b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpus.java @@ -0,0 +1,45 @@ +package htsjdk.samtools.cram; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * HTSCodecs test data is kept in a separate repository, currently at https://github.com/jkbonfield/htscodecs-corpus + * so it can be shared across htslib/samtools/htsjdk. + */ +public class CRAMCodecCorpus { + public static final String HTSCODECS_TEST_DATA_ENV = "HTSCODECS_TEST_DATA"; + + /** + * @return true if htscodecs test data is available, otherwise false + */ + public static boolean isHtsCodecsTestDataAvailable() { + final Path testDataPath = getHTSCodecsTestDataLocation(); + return Files.exists(testDataPath); + } + + /** + * @return throws a RuntimeException if the htscodecs test data repo is not available locally + */ + public static void assertHTSCodecsTestDataAvailable() { + if (!isHtsCodecsTestDataAvailable()) { + throw new RuntimeException( + String.format( + "No HTS codecs test data found." + + " The %s environment variable must be set to the location of the local hts codecs test data.", + HTSCODECS_TEST_DATA_ENV)); + } + } + + /** + * @return the name and location of the local hts codecs test data as specified by the environment + * variable HTSCODECS_TEST_DATA, or the default value of "../htscodecs-corpus" if the environment + * variable is not set + */ + public static Path getHTSCodecsTestDataLocation() { + final String htsCodecsTestLocation = System.getenv(HTSCODECS_TEST_DATA_ENV); + return htsCodecsTestLocation == null ? Paths.get("../htscodecs/tests") : Paths.get(htsCodecsTestLocation); + } + +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java new file mode 100644 index 0000000000..b952bdcd55 --- /dev/null +++ b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java @@ -0,0 +1,247 @@ +package htsjdk.samtools.cram; + +import htsjdk.HtsjdkTest; +import htsjdk.samtools.cram.compression.rans.*; +import org.apache.commons.compress.utils.IOUtils; +import org.testng.Assert; +import org.testng.SkipException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +/** + * HTSCodecs test data is kept in a separate repository, currently at https://github.com/jkbonfield/htscodecs-corpus + * so it can be shared across htslib/samtools/htsjdk. + */ +public class CRAMCodecCorpusTest extends HtsjdkTest { + @Test + public void testGetHTSCodecsCorpus() { + if (!CRAMCodecCorpus.isHtsCodecsTestDataAvailable()) { + throw new SkipException(String.format( + "No HTS codecs test data found." + + " The %s environment variable must be set to the location of the local hts codecs test data.", + CRAMCodecCorpus.HTSCODECS_TEST_DATA_ENV)); + } + } + + ///////////////////////////////////////////////////////////////////////////////////////////////// + // RANS tests + ///////////////////////////////////////////////////////////////////////////////////////////////// + + //TODO: the TestDataProviders tests fail if the hts codecs corpus isn't available because + + @DataProvider(name = "rans4x8") + public Object[][] getRANS4x8TestData() throws IOException { + // cache/reuse this for each test case to eliminate excessive garbage collection + final RANS4x8Encode rans4x8Encode = new RANS4x8Encode(); + final RANS4x8Decode rans4x8Decode = new RANS4x8Decode(); + final RANS4x8Params params0 = new RANS4x8Params(RANSParams.ORDER.ZERO); // RANS 4x8 order 0 + final RANS4x8Params params1 = new RANS4x8Params(RANSParams.ORDER.ONE); // RANS 4x8 order 1 + final List testCases = new ArrayList<>(); + getHtsCodecRANSTestFiles().stream() + .forEach(p -> + { + testCases.add(new Object[] {p, rans4x8Encode , rans4x8Decode, params0 }); + testCases.add(new Object[] {p, rans4x8Encode , rans4x8Decode, params1 }); + }); + return testCases.toArray(new Object[][]{}); + } + + @DataProvider(name = "ransNx16") + public Object[][] getRANS4x16TestData() throws IOException { + final RANSNx16Encode ransNx16Encode = new RANSNx16Encode(); + final RANSNx16Decode ransNx16Decode = new RANSNx16Decode(); + final RANSNx16Params params0 = new RANSNx16Params(0); // RANS Nx16 order 0, none of the bit flags are set + final List testCases = new ArrayList<>(); + getHtsCodecRANSTestFiles().stream() + .forEach(p -> + { + testCases.add(new Object[] {p, ransNx16Encode, ransNx16Decode , params0}); + }); + return testCases.toArray(new Object[][]{}); + } + + @Test ( + dataProvider = "rans4x8", + dependsOnMethods = "testGetHTSCodecsCorpus", + description = "Roundtrip using htsjdk RANS4x8." + + " Compare the output with the original file" ) + public void testRANSRoundTrip4x8( + final Path inputTestDataPath, + final RANS4x8Encode ransEncode, + final RANS4x8Decode ransDecode, + final RANS4x8Params params) throws IOException { + if (!CRAMCodecCorpus.isHtsCodecsTestDataAvailable()) { + throw new SkipException("htscodecs test data is not available locally"); + } + try (final InputStream is = Files.newInputStream(inputTestDataPath)) { + // preprocess the uncompressed data (to match what the htscodecs-library test harness does) + // by filtering out the embedded newlines, and then round trip through RANS and compare the + // results + + final ByteBuffer uncompressedBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(is))); + final ByteBuffer compressedBytes = ransEncode.compress(uncompressedBytes, params); + uncompressedBytes.rewind(); + System.out.println(String.format("%s RANS4x16 Order (%s) Uncompressed: (%,d) Compressed: (%,d)", + inputTestDataPath.getFileName(), + params.getOrder(), + uncompressedBytes.remaining(), + compressedBytes.remaining())); + Assert.assertEquals(ransDecode.uncompress(compressedBytes, params), uncompressedBytes); + } + } + + @Test ( + dataProvider = "rans4x8", + dependsOnMethods = "testGetHTSCodecsCorpus", + description = "Compress the original file using htsjdk RANS4x8 and compare it with the existing compressed file. " + + "Uncompress the existing compressed file using htsjdk RANS4x8 and compare it with the original file.") + public void testRANSPreCompressed4x8( + final Path inputTestDataPath, + final RANS4x8Encode ransEncode, + final RANS4x8Decode ransDecode, + final RANS4x8Params params ) throws IOException { + if (!CRAMCodecCorpus.isHtsCodecsTestDataAvailable()) { + throw new SkipException("htscodecs test data is not available locally"); + } + + final Path preCompressedDataPath = getCompressedRANSPath("4x8",inputTestDataPath, params.getOrder().ordinal()); + try (final InputStream inputStream = Files.newInputStream(inputTestDataPath); + final InputStream preCompressedInputStream = Files.newInputStream(preCompressedDataPath); + ) { + // preprocess the uncompressed data (to match what the htscodecs-library test harness does) + // by filtering out the embedded newlines, and then round trip through RANS and compare the + // results + final ByteBuffer inputBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(inputStream))); + + final ByteBuffer preCompressedInputBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInputStream)); + + // Use htsjdk to compress the input file from htscodecs repo + final ByteBuffer htsjdkCompressedBytes = ransEncode.compress(inputBytes, params); + inputBytes.rewind(); + + // Compare the htsjdk compressed bytes with the precompressed file from htscodecs repo + Assert.assertEquals(htsjdkCompressedBytes, preCompressedInputBytes); + + // Use htsjdk to uncompress the precompressed file from htscodecs repo + final ByteBuffer htsjdkUncompressedBytes = ransDecode.uncompress(preCompressedInputBytes, params); + + // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo + Assert.assertEquals(htsjdkUncompressedBytes, inputBytes); + } + } + + @Test ( + dataProvider = "ransNx16", + dependsOnMethods = "testGetHTSCodecsCorpus", + description = "Roundtrip the original file using RANSNx16 htsjdk." + + " Compare the output with the original file" ) + public void testRANSRoundTripNx16( + final Path inputTestDataPath, + final RANSNx16Encode ransEncode, + final RANSNx16Decode ransDecode, + final RANSNx16Params params) throws IOException { + if (!CRAMCodecCorpus.isHtsCodecsTestDataAvailable()) { + throw new SkipException("htscodecs test data is not available locally"); + } + + try (final InputStream is = Files.newInputStream(inputTestDataPath)) { + // preprocess the uncompressed data (to match what the htscodecs-library test harness does) + // by filtering out the embedded newlines, and then round trip through RANS and compare the + // results + + final ByteBuffer uncompressedBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(is))); + final ByteBuffer compressedBytes = ransEncode.compress(uncompressedBytes, params); + uncompressedBytes.rewind(); + System.out.println(String.format("%s RANS4x16 Order (%s) Uncompressed: (%,d) Compressed: (%,d)", + inputTestDataPath.getFileName(), + params.getOrder(), + uncompressedBytes.remaining(), + compressedBytes.remaining())); + Assert.assertEquals(ransDecode.uncompress(compressedBytes,params), uncompressedBytes); + } + } + + @Test ( + dataProvider = "ransNx16", + dependsOnMethods = "testGetHTSCodecsCorpus", + description = "Compress the original file using htsjdk RANSNx16 and compare it with the existing compressed file. " + + "Uncompress the existing compressed file using htsjdk RANSNx16 and compare it with the original file.") + public void testRANSPreCompressedNx16( + final Path inputTestDataPath, + final RANSNx16Encode ransEncode, + final RANSNx16Decode ransDecode, + final RANSNx16Params params) throws IOException { + if (!CRAMCodecCorpus.isHtsCodecsTestDataAvailable()) { + throw new SkipException("htscodecs test data is not available locally"); + } + + final Path preCompressedDataPath = getCompressedRANSPath("4x16",inputTestDataPath, params.getOrder().ordinal()); + try (final InputStream inputStream = Files.newInputStream(inputTestDataPath); + final InputStream preCompressedInputStream = Files.newInputStream(preCompressedDataPath); + ) { + // preprocess the uncompressed data (to match what the htscodecs-library test harness does) + // by filtering out the embedded newlines, and then round trip through RANS and compare the + // results + final ByteBuffer inputBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(inputStream))); + + final ByteBuffer preCompressedInputBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInputStream)); + + // Use htsjdk to compress the input file from htscodecs repo + final ByteBuffer htsjdkCompressedBytes = ransEncode.compress(inputBytes, params); + inputBytes.rewind(); + + // Compare the htsjdk compressed bytes with the precompressed file from htscodecs repo + Assert.assertEquals(htsjdkCompressedBytes, preCompressedInputBytes); + + // Use htsjdk to uncompress the precompressed file from htscodecs repo + final ByteBuffer htsjdkUncompressedBytes = ransDecode.uncompress(preCompressedInputBytes, params); + + // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo + Assert.assertEquals(htsjdkUncompressedBytes, inputBytes); + } + } + + // return a list of all RANS test data files in the htscodecs test directory + private List getHtsCodecRANSTestFiles() throws IOException { + CRAMCodecCorpus.assertHTSCodecsTestDataAvailable(); + final List paths = new ArrayList<>(); + Files.newDirectoryStream( + CRAMCodecCorpus.getHTSCodecsTestDataLocation().resolve("dat"), + path -> path.getFileName().startsWith("q4") || + path.getFileName().startsWith("q8") || + path.getFileName().startsWith("qvar")) + // q40+dir is excluded because the uncompressed size in the compressed file prefix does not match + // the original file size. + // Q: why isn't q40+dir not included as it also startswith q4? + .forEach(path -> paths.add(path)); + return paths; + } + + // the input files have embedded newlines that the test remove before round-tripping... + final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { + try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + for (final byte b : rawBytes) { + if (b != '\n') { + baos.write(b); + } + } + return baos.toByteArray(); + } + } + + // Given a test file name, map it to the corresponding rans compressed path + final Path getCompressedRANSPath(final String ransType,final Path inputTestDataPath, int order) { + final String compressedFileName = String.format("r%s/%s.%s", ransType, inputTestDataPath.getFileName(), order); + return inputTestDataPath.getParent().resolve(compressedFileName); + } + +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/build/SliceFactoryTest.java b/src/test/java/htsjdk/samtools/cram/build/SliceFactoryTest.java index cee032bd6c..72316c47c9 100644 --- a/src/test/java/htsjdk/samtools/cram/build/SliceFactoryTest.java +++ b/src/test/java/htsjdk/samtools/cram/build/SliceFactoryTest.java @@ -4,7 +4,6 @@ import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.cram.CRAMException; -import htsjdk.samtools.cram.compression.rans.RANS; import htsjdk.samtools.cram.ref.ReferenceContext; import htsjdk.samtools.cram.structure.CRAMEncodingStrategy; import htsjdk.samtools.cram.structure.CRAMStructureTestHelper; diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 9c5b7c5752..5e7fd621ef 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -2,13 +2,16 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.util.TestUtil; +import htsjdk.utils.TestNGUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.nio.ByteBuffer; +import java.util.Arrays; import java.util.Random; import java.util.function.BiFunction; +import java.util.stream.Stream; /** * Created by vadim on 22/04/2015. @@ -28,7 +31,6 @@ public String toString() { } } - @DataProvider(name="ransData") public Object[][] getRansTestData() { return new Object[][] { { new TestCaseWrapper(new byte[]{}) }, @@ -49,107 +51,199 @@ public Object[][] getRansTestData() { }; } - @Test(dataProvider="ransData") - public void testRANS(final TestCaseWrapper tc) { - roundTripForEachOrder(tc.testArray); + @DataProvider(name="rans4x8") + public Object[][] getRans4x8Codecs() { + final RANS4x8Encode rans4x8Encode = new RANS4x8Encode(); + final RANS4x8Decode rans4x8Decode = new RANS4x8Decode(); + final RANS4x8Params rans4x8ParamsOrder0 = new RANS4x8Params(RANSParams.ORDER.ZERO); // RANS4x8 Order 0 + final RANS4x8Params rans4x8ParamsOrder1 = new RANS4x8Params(RANSParams.ORDER.ONE); // RANS4x8 Order 1 + return new Object[][]{ + {rans4x8Encode, rans4x8Decode, rans4x8ParamsOrder0}, + {rans4x8Encode, rans4x8Decode, rans4x8ParamsOrder1} + }; + } + + @DataProvider(name="ransNx16") + public Object[][] getRansNx16Codecs() { + final RANSNx16Encode ransNx16Encode = new RANSNx16Encode(); + final RANSNx16Decode ransNx16Decode = new RANSNx16Decode(); + final RANSNx16Params ransNx16ParamsFormatFlags0 = new RANSNx16Params(0); //RANSNx16 formatFlags(first byte) 0 + // TODO: More formatFlags values i.e, combinations of bit flags will be added later + return new Object[][]{ + {ransNx16Encode, ransNx16Decode, ransNx16ParamsFormatFlags0} + }; + } + + @DataProvider(name = "allRansCodecs") + public Object[][] getAllRansCodecs() { + // concatenate RANS4x8 and RANSNx16 codecs + return Stream.concat(Arrays.stream(getRans4x8Codecs()), Arrays.stream(getRansNx16Codecs())) + .toArray(Object[][]::new); + } + + @DataProvider(name="rans4x8AndData") + public Object[][] getRans4x8AndData() { + // this data provider provides all the testdata for RANS4x8 order 0 and order 1 + return TestNGUtils.cartesianProduct(getRansTestData(), getRans4x8Codecs()); } - @Test - public void testSizeRangeTiny() { + @DataProvider(name="ransNx16AndData") + public Object[][] getRansNx16AndData() { + // this data provider provides all the testdata for RANSNx16 formatFlags = 0 + return TestNGUtils.cartesianProduct(getRansTestData(), getRansNx16Codecs()); + } + + @Test(dataProvider = "allRansCodecs") + public void testSizeRangeTiny( + final RANSEncode ransEncode, + final RANSDecode ransDecode, + final RANSParams params) { for (int i = 0; i < 20; i++) { final byte[] data = randomBytesFromGeometricDistribution(100, 0.1); final ByteBuffer in = ByteBuffer.wrap(data); for (int size = 1; size < data.length; size++) { in.position(0); in.limit(size); - roundTripForEachOrder(in); + ransRoundTrip(in, ransEncode, ransDecode, params); } } } - @Test - public void testSizeRangeSmall() { + @Test(dataProvider = "allRansCodecs") + public void testSizeRangeSmall( + final RANSEncode ransEncode, + final RANSDecode ransDecode, + final RANSParams params) { final byte[] data = randomBytesFromGeometricDistribution(1000, 0.01); final ByteBuffer in = ByteBuffer.wrap(data); for (int size = 4; size < data.length; size++) { in.position(0); in.limit(size); - roundTripForEachOrder(in); + ransRoundTrip(in, ransEncode, ransDecode, params); } } - @Test - public void testLargeSize() { + @Test(dataProvider = "allRansCodecs") + public void testLargeSize( + final RANSEncode ransEncode, + final RANSDecode ransDecode, + final RANSParams params) { final int size = 100 * 1000 + 3; final byte[] data = randomBytesFromGeometricDistribution(size, 0.01); final ByteBuffer in = ByteBuffer.wrap(data); for (int limit = size - 4; limit < size; limit++) { in.position(0); in.limit(limit); - roundTripForEachOrder(in); + ransRoundTrip(in, ransEncode, ransDecode, params); } } - @Test - public void testBuffersMeetBoundaryExpectations() { + @Test(dataProvider = "rans4x8") + public void testRans4x8BuffersMeetBoundaryExpectations( + final RANS4x8Encode ransEncode, + final RANS4x8Decode ransDecode, + final RANS4x8Params params) { + final int size = 1001; + final ByteBuffer raw = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); + final ByteBuffer compressed = ransEncode.compress(raw, params); + Assert.assertFalse(raw.hasRemaining()); + Assert.assertEquals(raw.limit(), size); + Assert.assertEquals(compressed.position(), 0); + Assert.assertTrue(compressed.limit() > 10); + Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); + Assert.assertEquals(compressed.getInt(), compressed.limit() - 1 - 4 - 4); + Assert.assertEquals(compressed.getInt(), size); + compressed.rewind(); + + final ByteBuffer uncompressed = ransDecode.uncompress(compressed,params); + Assert.assertFalse(compressed.hasRemaining()); + Assert.assertEquals(uncompressed.limit(), size); + Assert.assertEquals(uncompressed.position(), 0); + } + + @Test(dataProvider = "ransNx16") + public void testRansNx16BuffersMeetBoundaryExpectations( + final RANSNx16Encode ransEncode, + final RANSNx16Decode ransDecode, + final RANSNx16Params params) { final int size = 1001; final ByteBuffer raw = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); - final RANS rans = new RANS(); - for (RANS.ORDER order : RANS.ORDER.values()) { - final ByteBuffer compressed = rans.compress(raw, order); - Assert.assertFalse(raw.hasRemaining()); - Assert.assertEquals(raw.limit(), size); - - Assert.assertEquals(compressed.position(), 0); - Assert.assertTrue(compressed.limit() > 10); - Assert.assertEquals(compressed.get(), (byte) order.ordinal()); - Assert.assertEquals(compressed.getInt(), compressed.limit() - 1 - 4 - 4); - Assert.assertEquals(compressed.getInt(), size); - compressed.rewind(); - - final ByteBuffer uncompressed = rans.uncompress(compressed); - Assert.assertFalse(compressed.hasRemaining()); - Assert.assertEquals(uncompressed.limit(), size); - Assert.assertEquals(uncompressed.position(), 0); - - raw.rewind(); + final ByteBuffer compressed = ransEncode.compress(raw, params); + Assert.assertFalse(raw.hasRemaining()); + Assert.assertEquals(raw.limit(), size); + Assert.assertEquals(compressed.position(), 0); + Assert.assertTrue(compressed.limit() > 1); // minimum prefix len when input is not Empty + final int FormatFlags = compressed.get(); // first byte of compressed data is the formatFlags + Assert.assertEquals(FormatFlags,params.getFormatFlags()); + // if nosz flag is not set, then the uncompressed size is recorded + if (!params.getNosz()){ + Assert.assertEquals(Utils.readUint7(compressed), size); } + compressed.rewind(); + + final ByteBuffer uncompressed = ransDecode.uncompress(compressed,params); + Assert.assertFalse(compressed.hasRemaining()); + Assert.assertEquals(uncompressed.limit(), size); + Assert.assertEquals(uncompressed.position(), 0); } - @Test - public void testRansHeader() { - final byte[] data = randomBytesFromGeometricDistribution(1000, 0.01); - final ByteBuffer compressed = new RANS().compress(ByteBuffer.wrap(data), RANS.ORDER.ZERO); - Assert.assertEquals(compressed.get(), (byte) 0); + @Test(dataProvider = "rans4x8") + public void testRans4x8Header( + final RANS4x8Encode ransEncode, + final RANS4x8Decode ransDecode, + final RANS4x8Params params) { + final int size = 1000; + final ByteBuffer data = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); + final ByteBuffer compressed = ransEncode.compress(data, params); + // first byte of compressed data gives the order + Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); + // the next 4 bytes gives the compressed size Assert.assertEquals(compressed.getInt(), compressed.limit() - 9); - Assert.assertEquals(compressed.getInt(), data.length); + // the next 4 bytes gives the uncompressed size + Assert.assertEquals(compressed.getInt(), data.limit()); } - private byte[] getNBytesWithValues(final int n, final BiFunction valueForIndex) { - final byte[] data = new byte[n]; - for (int i = 0; i < data.length; i++) { - data[i] = valueForIndex.apply(n, i); + @Test(dataProvider = "ransNx16") + public void testRansNx16Header( + final RANSNx16Encode ransEncode, + final RANSNx16Decode ransDecode, + final RANSNx16Params params) { + final int size = 1000; + final ByteBuffer data = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); + final ByteBuffer compressed = ransEncode.compress(data, params); + // first byte of compressed data gives the formatFlags + Assert.assertEquals(compressed.get(), (byte) params.getFormatFlags()); + // if nosz flag is not set, then the uncompressed size is recorded + if (!params.getNosz()){ + Assert.assertEquals(Utils.readUint7(compressed), size); } - return data; } - private static void roundTripForEachOrder(final ByteBuffer data) { - for (RANS.ORDER order : RANS.ORDER.values()) { - roundTripForOrder(data, order); - data.rewind(); - } + @Test(dataProvider="rans4x8AndData") + public void testRANS4x8( + final TestCaseWrapper tc, + final RANSEncode ransEncode, + final RANSDecode ransDecode, + final RANSParams params) { + ransRoundTrip(ByteBuffer.wrap(tc.testArray), ransEncode, ransDecode, params); } - private static void roundTripForEachOrder(final byte[] data) { - for (RANS.ORDER order : RANS.ORDER.values()) { - roundTripForOrder(data, order); - } + @Test(dataProvider="ransNx16AndData") + public void testRANSNx16( + final TestCaseWrapper tc, + final RANSEncode ransEncode, + final RANSDecode ransDecode, + final RANSParams params) { + ransRoundTrip(ByteBuffer.wrap(tc.testArray), ransEncode, ransDecode, params); } - private static void roundTripForOrder(final ByteBuffer data, final RANS.ORDER order) { - final RANS rans = new RANS(); - final ByteBuffer compressed = rans.compress(data, order); - final ByteBuffer uncompressed = rans.uncompress(compressed); + private static void ransRoundTrip( + final ByteBuffer data, + final RANSEncode ransEncode, + final RANSDecode ransDecode, + final RANSParams params) { + final ByteBuffer compressed = ransEncode.compress(data, params); + final ByteBuffer uncompressed = ransDecode.uncompress(compressed, params); data.rewind(); while (data.hasRemaining()) { if (!uncompressed.hasRemaining()) { @@ -160,8 +254,12 @@ private static void roundTripForOrder(final ByteBuffer data, final RANS.ORDER or Assert.assertFalse(uncompressed.hasRemaining()); } - private static void roundTripForOrder(final byte[] data, final RANS.ORDER order) { - roundTripForOrder(ByteBuffer.wrap(data), order); + private byte[] getNBytesWithValues(final int n, final BiFunction valueForIndex) { + final byte[] data = new byte[n]; + for (int i = 0; i < data.length; i++) { + data[i] = valueForIndex.apply(n, i); + } + return data; } private byte[] randomBytesFromGeometricDistribution(final int size, final double p) { @@ -185,4 +283,5 @@ private byte drawByteFromGeometricDistribution(final double probability) { final double g = Math.ceil(Math.log(1 - rand) / Math.log(1 - probability)) - 1; return (byte) g; } -} + +} \ No newline at end of file From defc1748cc952013450c4033bd0260748f53f44f Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 1 Mar 2022 14:12:22 -0500 Subject: [PATCH 03/76] Add Frequency methods to encode and decode classes --- .../cram/compression/rans/FrequencyUtils.java | 55 ++++ .../cram/compression/rans/RANS4x8Decode.java | 90 +++++- .../cram/compression/rans/RANS4x8Encode.java | 256 +++++++++++++++++- .../cram/compression/rans/RANSNx16Decode.java | 62 ++++- .../cram/compression/rans/RANSNx16Encode.java | 96 ++++++- 5 files changed, 545 insertions(+), 14 deletions(-) create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/FrequencyUtils.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/FrequencyUtils.java b/src/main/java/htsjdk/samtools/cram/compression/rans/FrequencyUtils.java new file mode 100644 index 0000000000..63c43ee669 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/FrequencyUtils.java @@ -0,0 +1,55 @@ +package htsjdk.samtools.cram.compression.rans; + +public class FrequencyUtils { + static int[] normaliseFrequenciesOrder0(final int[] F, final int bits) { + // Returns an array of normalised Frequencies, + // such that the frequencies add up to 1<> 31)) == 0) { + + // A non-zero symbol frequency should not be incorrectly set to 0. + // If the calculated value is 0, change it to 1 + F[j] = 1; + } + fsum += F[j]; + } + + // adjust the frequency of the symbol "M" such that + // the sum of frequencies of all the symbols = renormFreq + if (fsum < renormFreq) { + F[M] += renormFreq - fsum; + } else if (fsum > renormFreq){ + F[M] -= fsum - renormFreq; + } + return F; + } +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Decode.java index 24515c919f..87d36bdc59 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Decode.java @@ -2,6 +2,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.Arrays; public class RANS4x8Decode extends RANSDecode { @@ -43,7 +44,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, final RANS4x8Params para private ByteBuffer uncompressOrder0Way4(final ByteBuffer inBuffer, final ByteBuffer outBuffer) { // read the frequency table. using the frequency table, set the values of RANSDecodingSymbols - Frequencies4x8.readStatsOrder0(inBuffer, getD()[0], getDecodingSymbols()[0]); + readStatsOrder0(inBuffer, getD()[0], getDecodingSymbols()[0]); D04.uncompress(inBuffer, getD()[0], getDecodingSymbols()[0], outBuffer); return outBuffer; @@ -51,9 +52,94 @@ private ByteBuffer uncompressOrder0Way4(final ByteBuffer inBuffer, final ByteBuf private ByteBuffer uncompressOrder1Way4(final ByteBuffer in, final ByteBuffer outBuffer) { // read the frequency table. using the frequency table, set the values of RANSDecodingSymbols - Frequencies4x8.readStatsOrder1(in, getD(), getDecodingSymbols()); + readStatsOrder1(in, getD(), getDecodingSymbols()); D14.uncompress(in, outBuffer, getD(), getDecodingSymbols()); return outBuffer; } + private void readStatsOrder0(final ByteBuffer cp, final ArithmeticDecoder decoder, final RANSDecodingSymbol[] decodingSymbols) { + // Pre-compute reverse lookup of frequency. + int rle = 0; + int x = 0; + int j = cp.get() & 0xFF; + do { + if ((decoder.fc[j].F = (cp.get() & 0xFF)) >= 128) { + decoder.fc[j].F &= ~128; + decoder.fc[j].F = ((decoder.fc[j].F & 127) << 8) | (cp.get() & 0xFF); + } + decoder.fc[j].C = x; + + decodingSymbols[j].set(decoder.fc[j].C, decoder.fc[j].F); + + /* Build reverse lookup table */ + Arrays.fill(decoder.R, x, x + decoder.fc[j].F, (byte) j); + + x += decoder.fc[j].F; + + if (rle == 0 && j + 1 == (0xFF & cp.get(cp.position()))) { + j = cp.get() & 0xFF; + rle = cp.get() & 0xFF; + } else if (rle != 0) { + rle--; + j++; + } else { + j = cp.get() & 0xFF; + } + } while (j != 0); + + assert (x < Constants.TOTFREQ); + } + + private void readStatsOrder1(final ByteBuffer cp, final ArithmeticDecoder[] D, final RANSDecodingSymbol[][] decodingSymbols) { + int rle_i = 0; + int i = 0xFF & cp.get(); + do { + int rle_j = 0; + int x = 0; + int j = 0xFF & cp.get(); + do { + if ((D[i].fc[j].F = (0xFF & cp.get())) >= 128) { + D[i].fc[j].F &= ~128; + D[i].fc[j].F = ((D[i].fc[j].F & 127) << 8) | (0xFF & cp.get()); + } + D[i].fc[j].C = x; + + if (D[i].fc[j].F == 0) { + D[i].fc[j].F = Constants.TOTFREQ; + } + + decodingSymbols[i][j].set( + D[i].fc[j].C, + D[i].fc[j].F + ); + + /* Build reverse lookup table */ + Arrays.fill(D[i].R, x, x + D[i].fc[j].F, (byte) j); + + x += D[i].fc[j].F; + assert (x <= Constants.TOTFREQ); + + if (rle_j == 0 && j + 1 == (0xFF & cp.get(cp.position()))) { + j = (0xFF & cp.get()); + rle_j = (0xFF & cp.get()); + } else if (rle_j != 0) { + rle_j--; + j++; + } else { + j = (0xFF & cp.get()); + } + } while (j != 0); + + if (rle_i == 0 && i + 1 == (0xFF & cp.get(cp.position()))) { + i = (0xFF & cp.get()); + rle_i = (0xFF & cp.get()); + } else if (rle_i != 0) { + rle_i--; + i++; + } else { + i = (0xFF & cp.get()); + } + } while (i != 0); + } + } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Encode.java index 3788026d8b..e842342fcc 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Encode.java @@ -47,15 +47,15 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { outBuffer.position(PREFIX_BYTE_LENGTH); // start of frequency table // get the normalised frequencies of the alphabets - final int[] F = Frequencies4x8.calcFrequenciesOrder0(inBuffer); + final int[] F = calcFrequenciesOrder0(inBuffer); // using the normalised frequencies, set the RANSEncodingSymbols - Frequencies4x8.buildSymsOrder0(F, getEncodingSymbols()[0]); + buildSymsOrder0(F, getEncodingSymbols()[0]); final ByteBuffer cp = outBuffer.slice(); // write Frequency table - final int frequencyTableSize = Frequencies4x8.writeFrequenciesOrder0(cp, F); + final int frequencyTableSize = writeFrequenciesOrder0(cp, F); inBuffer.rewind(); final int compressedBlobSize = E04.compress(inBuffer, getEncodingSymbols()[0], cp); @@ -73,13 +73,13 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { outBuffer.position(PREFIX_BYTE_LENGTH); // get normalized frequencies - final int[][] F = Frequencies4x8.calcFrequenciesOrder1(inBuffer); + final int[][] F = calcFrequenciesOrder1(inBuffer); // using the normalised frequencies, set the RANSEncodingSymbols - Frequencies4x8.buildSymsOrder1(F, getEncodingSymbols()); + buildSymsOrder1(F, getEncodingSymbols()); final ByteBuffer cp = outBuffer.slice(); - final int frequencyTableSize = Frequencies4x8.writeFrequenciesOrder1(cp, F); + final int frequencyTableSize = writeFrequenciesOrder1(cp, F); inBuffer.rewind(); final int compressedBlobSize = E14.compress(inBuffer, getEncodingSymbols(), cp); @@ -109,4 +109,248 @@ private static void writeCompressionPrefix( outBuffer.rewind(); } + private static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { + final int inSize = inBuffer.remaining(); + + // Compute statistics + // T = total of true counts + // F = scaled integer frequencies + // M = sum(fs) + final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; + int T = 0; //// T is the total number of symbols in the input + for (int i = 0; i < inSize; i++) { + F[0xFF & inBuffer.get()]++; + T++; + } + final long tr = ((long) Constants.TOTFREQ << 31) / T + (1 << 30) / T; + + // Normalise so T[i] == TOTFREQ + // m is the maximum frequency value + // M is the symbol that has the maximum frequency + int m = 0; + int M = 0; + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (m < F[j]) { + m = F[j]; + M = j; + } + } + + int fsum = 0; + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (F[j] == 0) { + continue; + } + // using tr to normalize symbol frequencies such that their total = (1<<12) = 4096 + if ((F[j] = (int) ((F[j] * tr) >> 31)) == 0) { + // make sure that a non-zero symbol frequency is not incorrectly set to 0. + // Change it to 1 if the calculated value is 0. + F[j] = 1; + } + fsum += F[j]; + } + + fsum++; + // adjust the frequency of the symbol with maximum frequency to make sure that + // the sum of frequencies of all the symbols = 4096 + if (fsum < Constants.TOTFREQ) { + F[M] += Constants.TOTFREQ - fsum; + } else { + F[M] -= fsum - Constants.TOTFREQ; + } + assert (F[M] > 0); + return F; + } + + private static int[][] calcFrequenciesOrder1(final ByteBuffer in) { + final int in_size = in.remaining(); + + final int[][] F = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; + final int[] T = new int[Constants.NUMBER_OF_SYMBOLS]; + int c; + + int last_i = 0; + for (int i = 0; i < in_size; i++) { + F[last_i][c = (0xFF & in.get())]++; + T[last_i]++; + last_i = c; + } + F[0][0xFF & in.get((in_size >> 2))]++; + F[0][0xFF & in.get(2 * (in_size >> 2))]++; + F[0][0xFF & in.get(3 * (in_size >> 2))]++; + T[0] += 3; + + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + if (T[i] == 0) { + continue; + } + + final double p = ((double) Constants.TOTFREQ) / T[i]; + int t2 = 0, m = 0, M = 0; + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (F[i][j] == 0) + continue; + + if (m < F[i][j]) { + m = F[i][j]; + M = j; + } + + if ((F[i][j] *= p) == 0) + F[i][j] = 1; + t2 += F[i][j]; + } + + t2++; + if (t2 < Constants.TOTFREQ) { + F[i][M] += Constants.TOTFREQ - t2; + } else { + F[i][M] -= t2 - Constants.TOTFREQ; + } + } + + return F; + } + + private static RANSEncodingSymbol[] buildSymsOrder0(final int[] F, final RANSEncodingSymbol[] syms) { + final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; + + // T = running sum of frequencies including the current symbol + // F[j] = frequency of symbol "j" + // C[j] = cumulative frequency of all the symbols preceding "j" (and excluding the frequency of symbol "j") + int T = 0; + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + C[j] = T; + T += F[j]; + if (F[j] != 0) { + //For each symbol, set start = cumulative frequency and freq = frequency + syms[j].set(C[j], F[j], Constants.TF_SHIFT); + } + } + return syms; + } + + private static RANSEncodingSymbol[][] buildSymsOrder1(final int[][] F, final RANSEncodingSymbol[][] syms) { + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + final int[] F_i_ = F[i]; + int x = 0; + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (F_i_[j] != 0) { + syms[i][j].set(x, F_i_[j], Constants.TF_SHIFT); + x += F_i_[j]; + } + } + } + + return syms; + } + + private static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { + final int start = cp.position(); + + int rle = 0; + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (F[j] != 0) { + // j + if (rle != 0) { + rle--; + } else { + // write the symbol if it is the first symbol or if rle = 0. + // if rle != 0, then skip writing the symbol. + cp.put((byte) j); + // We've encoded two symbol frequencies in a row. + // How many more are there? Store that count so + // we can avoid writing consecutive symbols. + // Note: maximum possible rle = 254 + // rle requires atmost 1 byte + if (rle == 0 && j != 0 && F[j - 1] != 0) { + for (rle = j + 1; rle < 256 && F[rle] != 0; rle++) + ; + rle -= j + 1; + cp.put((byte) rle); + } + } + + // F[j] + if (F[j] < 128) { + cp.put((byte) (F[j])); + } else { + // if F[j] >127, it is written in 2 bytes + cp.put((byte) (128 | (F[j] >> 8))); + cp.put((byte) (F[j] & 0xff)); + } + } + } + + // write 0 indicating the end of frequency table + cp.put((byte) 0); + return cp.position() - start; + } + + private static int writeFrequenciesOrder1(final ByteBuffer cp, final int[][] F) { + final int start = cp.position(); + final int[] T = new int[Constants.NUMBER_OF_SYMBOLS]; + + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + T[i] += F[i][j]; + } + } + + int rle_i = 0; + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + if (T[i] == 0) { + continue; + } + + // Store frequency table + // i + if (rle_i != 0) { + rle_i--; + } else { + cp.put((byte) i); + // FIXME: could use order-0 statistics to observe which alphabet + // symbols are present and base RLE on that ordering instead. + if (i != 0 && T[i - 1] != 0) { + for (rle_i = i + 1; rle_i < 256 && T[rle_i] != 0; rle_i++) + ; + rle_i -= i + 1; + cp.put((byte) rle_i); + } + } + + final int[] F_i_ = F[i]; + int rle_j = 0; + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (F_i_[j] != 0) { + + // j + if (rle_j != 0) { + rle_j--; + } else { + cp.put((byte) j); + if (rle_j == 0 && j != 0 && F_i_[j - 1] != 0) { + for (rle_j = j + 1; rle_j < 256 && F_i_[rle_j] != 0; rle_j++) + ; + rle_j -= j + 1; + cp.put((byte) rle_j); + } + } + + // F_i_[j] + if (F_i_[j] < 128) { + cp.put((byte) F_i_[j]); + } else { + cp.put((byte) (128 | (F_i_[j] >> 8))); + cp.put((byte) (F_i_[j] & 0xff)); + } + } + } + cp.put((byte) 0); + } + cp.put((byte) 0); + + return cp.position() - start; + } + } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Decode.java index 82dda96c20..0f27193346 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Decode.java @@ -2,6 +2,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.Arrays; public class RANSNx16Decode extends RANSDecode{ private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); @@ -60,10 +61,69 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, final RANSNx16Params pa private ByteBuffer uncompressOrder0WayN(final ByteBuffer inBuffer, final ByteBuffer outBuffer,final int n_out,final int Nway) { // read the frequency table, get the normalised frequencies and use it to set the RANSDecodingSymbols - FrequenciesNx16.readStatsOrder0(inBuffer, getD()[0], getDecodingSymbols()[0]); + readStatsOrder0(inBuffer, getD()[0], getDecodingSymbols()[0]); // uncompress using Nway rans states D0N.uncompress(inBuffer, getD()[0], getDecodingSymbols()[0], outBuffer,n_out,Nway); return outBuffer; } + private static void readStatsOrder0( + final ByteBuffer cp, + ArithmeticDecoder decoder, + RANSDecodingSymbol[] decodingSymbols) { + // Use the Frequency table to set the values of F, C and R + final int[] A = readAlphabet(cp); + int x = 0; + final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; + + // read F, normalise F then calculate C and R + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (A[j] > 0) { + if ((F[j] = (cp.get() & 0xFF)) >= 128){ + F[j] &= ~128; + F[j] = (( F[j] &0x7f) << 7) | (cp.get() & 0x7F); + } + } + } + FrequencyUtils.normaliseFrequenciesOrder0(F,12); + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if(A[j]>0){ + + // decoder.fc[j].F -> Frequency + // decoder.fc[j].C -> Cumulative Frequency preceding the current symbol + decoder.fc[j].F = F[j]; + decoder.fc[j].C = x; + decodingSymbols[j].set(decoder.fc[j].C, decoder.fc[j].F); + + // R -> Reverse Lookup table + Arrays.fill(decoder.R, x, x + decoder.fc[j].F, (byte) j); + x += decoder.fc[j].F; + } + } + } + + private static int[] readAlphabet(final ByteBuffer cp){ + // gets the list of alphabets whose frequency!=0 + final int[] A = new int[Constants.NUMBER_OF_SYMBOLS]; + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + A[i]=0; + } + int rle = 0; + int sym = cp.get() & 0xFF; + int last_sym = sym; + do { + A[sym] = 1; + if (rle!=0) { + rle--; + sym++; + } else { + sym = cp.get() & 0xFF; + if (sym == last_sym+1) + rle = cp.get() & 0xFF; + } + last_sym = sym; + } while (sym != 0); + return A; + } + } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Encode.java index 8c927f3c04..b451310b67 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Encode.java @@ -57,7 +57,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params param private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final int Nway, final ByteBuffer outBuffer) { final int inSize = inBuffer.remaining(); - final int[] F = FrequenciesNx16.buildFrequenciesOrder0(inBuffer); + final int[] F = buildFrequenciesOrder0(inBuffer); final ByteBuffer cp = outBuffer.slice(); int bitSize = (int) Math.ceil(Math.log(inSize) / Math.log(2)); if (bitSize == 0) { @@ -71,16 +71,16 @@ private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final int Nway, final int prefix_size = outBuffer.position(); // Normalize Frequencies such that sum of Frequencies = 1 << bitsize - FrequenciesNx16.normaliseFrequenciesOrder0(F, bitSize); + FrequencyUtils.normaliseFrequenciesOrder0(F, bitSize); // Write the Frequency table. Keep track of the size for later - final int frequencyTableSize = FrequenciesNx16.writeFrequenciesOrder0(cp, F); + final int frequencyTableSize = writeFrequenciesOrder0(cp, F); // Normalize Frequencies such that sum of Frequencies = 1 << 12 - FrequenciesNx16.normaliseFrequenciesOrder0(F, 12); + FrequencyUtils.normaliseFrequenciesOrder0(F, 12); // update the RANS Encoding Symbols - FrequenciesNx16.buildSymsOrder0(F, getEncodingSymbols()[0]); + buildSymsOrder0(F, getEncodingSymbols()[0]); inBuffer.rewind(); final int compressedBlobSize = E0N.compress(inBuffer, getEncodingSymbols()[0], cp, Nway); outBuffer.rewind(); // set position to 0 @@ -88,4 +88,90 @@ private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final int Nway, return outBuffer; } + private static int[] buildFrequenciesOrder0(final ByteBuffer inBuffer) { + // Returns an array of raw symbol frequencies + final int inSize = inBuffer.remaining(); + final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; + for (int i = 0; i < inSize; i++) { + F[0xFF & inBuffer.get()]++; + } + return F; + } + + private static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { + // Order 0 frequencies store the complete alphabet of observed + // symbols using run length encoding, followed by a table of frequencies + // for each symbol in the alphabet. + final int start = cp.position(); + + // write the alphabet first and then their frequencies + writeAlphabet(cp,F); + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (F[j] != 0) { + if (F[j] < 128) { + cp.put((byte) (F[j] & 0x7f)); + } else { + + // if F[j] >127, it is written in 2 bytes + // right shift by 7 and get the most Significant Bits. + // Set the Most Significant Bit of the first byte to 1 indicating that the frequency comprises of 2 bytes + cp.put((byte) (128 | (F[j] >> 7))); + cp.put((byte) (F[j] & 0x7f)); //Least Significant 7 Bits + } + } + } + return cp.position() - start; + } + + private static void writeAlphabet(final ByteBuffer cp, final int[] F) { + // Uses Run Length Encoding to write all the symbols whose frequency!=0 + int rle = 0; + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (F[j] != 0) { + if (rle != 0) { + rle--; + } else { + + // write the symbol if it is the first symbol or if rle = 0. + // if rle != 0, then skip writing the symbol. + cp.put((byte) j); + + // We've encoded two symbol frequencies in a row. + // How many more are there? Store that count so + // we can avoid writing consecutive symbols. + // Note: maximum possible rle = 254 + // rle requires atmost 1 byte + if (rle == 0 && j != 0 && F[j - 1] != 0) { + for (rle = j + 1; rle < Constants.NUMBER_OF_SYMBOLS && F[rle] != 0; rle++); + rle -= j + 1; + cp.put((byte) rle); + } + } + } + } + + // write 0 indicating the end of alphabet + cp.put((byte) 0); + } + + private static RANSEncodingSymbol[] buildSymsOrder0(final int[] F, final RANSEncodingSymbol[] syms) { + // updates the RANSEncodingSymbol array for all the symbols + final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; + + // T = running sum of frequencies including the current symbol + // F[j] = frequency of symbol "j" + // C[j] = cumulative frequency of all the symbols preceding "j" (excluding the frequency of symbol "j") + int T = 0; + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + C[j] = T; + T += F[j]; + if (F[j] != 0) { + + //For each symbol, set start = cumulative frequency and freq = frequency + syms[j].set(C[j], F[j], Constants.TF_SHIFT); + } + } + return syms; + } + } \ No newline at end of file From f3734ca310e8fcd33c1b3e1518ed880df3ce01b8 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 7 Mar 2022 13:20:15 -0500 Subject: [PATCH 04/76] clean up rans tests and add separate packages for rans 4x8 and nx16 --- .../cram/compression/ExternalCompressor.java | 4 +- .../compression/RANSExternalCompressor.java | 6 +- .../compression/rans/ArithmeticDecoder.java | 6 +- .../cram/compression/rans/Constants.java | 12 +- .../samtools/cram/compression/rans/FC.java | 5 +- .../cram/compression/rans/Frequencies4x8.java | 339 ------------------ .../compression/rans/FrequenciesNx16.java | 205 ----------- .../cram/compression/rans/FrequencyUtils.java | 55 --- .../cram/compression/rans/RANS4x8Params.java | 16 - .../cram/compression/rans/RANSDecode.java | 6 +- .../compression/rans/RANSDecodingSymbol.java | 2 +- .../cram/compression/rans/RANSEncode.java | 4 +- .../compression/rans/RANSEncodingSymbol.java | 2 +- .../samtools/cram/compression/rans/Utils.java | 63 +++- .../compression/rans/{ => rans4x8}/D04.java | 10 +- .../compression/rans/{ => rans4x8}/D14.java | 7 +- .../compression/rans/{ => rans4x8}/E04.java | 6 +- .../compression/rans/{ => rans4x8}/E14.java | 6 +- .../rans/{ => rans4x8}/RANS4x8Decode.java | 21 +- .../rans/{ => rans4x8}/RANS4x8Encode.java | 16 +- .../rans/rans4x8/RANS4x8Params.java | 23 ++ .../compression/rans/{ => ransnx16}/D0N.java | 7 +- .../compression/rans/{ => ransnx16}/E0N.java | 6 +- .../rans/{ => ransnx16}/RANSNx16Decode.java | 21 +- .../rans/{ => ransnx16}/RANSNx16Encode.java | 19 +- .../rans/{ => ransnx16}/RANSNx16Params.java | 25 +- .../CompressionHeaderEncodingMap.java | 2 +- .../cram/structure/CompressorCache.java | 4 +- .../samtools/cram/CRAMCodecCorpusTest.java | 163 ++++----- .../cram/compression/rans/RansTest.java | 145 +++----- 30 files changed, 336 insertions(+), 870 deletions(-) delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies4x8.java delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/FrequenciesNx16.java delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/FrequencyUtils.java delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Params.java rename src/main/java/htsjdk/samtools/cram/compression/rans/{ => rans4x8}/D04.java (87%) rename src/main/java/htsjdk/samtools/cram/compression/rans/{ => rans4x8}/D14.java (88%) rename src/main/java/htsjdk/samtools/cram/compression/rans/{ => rans4x8}/E04.java (88%) rename src/main/java/htsjdk/samtools/cram/compression/rans/{ => rans4x8}/E14.java (92%) rename src/main/java/htsjdk/samtools/cram/compression/rans/{ => rans4x8}/RANS4x8Decode.java (85%) rename src/main/java/htsjdk/samtools/cram/compression/rans/{ => rans4x8}/RANS4x8Encode.java (95%) create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java rename src/main/java/htsjdk/samtools/cram/compression/rans/{ => ransnx16}/D0N.java (84%) rename src/main/java/htsjdk/samtools/cram/compression/rans/{ => ransnx16}/E0N.java (88%) rename src/main/java/htsjdk/samtools/cram/compression/rans/{ => ransnx16}/RANSNx16Decode.java (87%) rename src/main/java/htsjdk/samtools/cram/compression/rans/{ => ransnx16}/RANSNx16Encode.java (91%) rename src/main/java/htsjdk/samtools/cram/compression/rans/{ => ransnx16}/RANSNx16Params.java (80%) diff --git a/src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java b/src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java index 5bf27afbea..aabd3bc4cd 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java +++ b/src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java @@ -1,7 +1,7 @@ package htsjdk.samtools.cram.compression; -import htsjdk.samtools.cram.compression.rans.RANS4x8Decode; -import htsjdk.samtools.cram.compression.rans.RANS4x8Encode; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; import htsjdk.utils.ValidationUtils; diff --git a/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java b/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java index e89df5353d..0898e2de21 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java +++ b/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java @@ -24,10 +24,10 @@ */ package htsjdk.samtools.cram.compression; -import htsjdk.samtools.cram.compression.rans.RANS4x8Decode; -import htsjdk.samtools.cram.compression.rans.RANS4x8Encode; -import htsjdk.samtools.cram.compression.rans.RANS4x8Params; import htsjdk.samtools.cram.compression.rans.RANSParams; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; import java.nio.ByteBuffer; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java index f89a8e5e30..7f6249bebe 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java @@ -24,11 +24,11 @@ */ package htsjdk.samtools.cram.compression.rans; -final class ArithmeticDecoder { - final FC[] fc = new FC[256]; +final public class ArithmeticDecoder { + public final FC[] fc = new FC[256]; // reverse lookup table - byte[] R = new byte[Constants.TOTFREQ]; + public byte[] R = new byte[Constants.TOTFREQ]; public ArithmeticDecoder() { for (int i = 0; i < 256; i++) { diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java index 9e5ad9fc5b..3cb6439481 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java @@ -1,9 +1,9 @@ package htsjdk.samtools.cram.compression.rans; -final class Constants { - static final int TF_SHIFT = 12; - static final int TOTFREQ = (1 << TF_SHIFT); // 4096 - static final int RANS_BYTE_L_4x8 = 1 << 23; - static final int RANS_BYTE_L_Nx16 = 1 << 15; - static final int NUMBER_OF_SYMBOLS = 256; +final public class Constants { + public static final int TF_SHIFT = 12; + public static final int TOTFREQ = (1 << TF_SHIFT); // 4096 + public static final int RANS_BYTE_L_4x8 = 1 << 23; + public static final int RANS_BYTE_L_Nx16 = 1 << 15; + public static final int NUMBER_OF_SYMBOLS = 256; } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/FC.java b/src/main/java/htsjdk/samtools/cram/compression/rans/FC.java index dc08e5f132..d9cb3f66ad 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/FC.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/FC.java @@ -24,8 +24,9 @@ */ package htsjdk.samtools.cram.compression.rans; -final class FC { - int F, C; +public final class FC { + public int F; + public int C; public void reset() { F = C = 0; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies4x8.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies4x8.java deleted file mode 100644 index e401039a7a..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Frequencies4x8.java +++ /dev/null @@ -1,339 +0,0 @@ -package htsjdk.samtools.cram.compression.rans; - -import java.nio.ByteBuffer; -import java.util.Arrays; - -// T = total of true counts -// F = scaled integer frequencies -// M = sum(fs) - -final class Frequencies4x8 { - - static void readStatsOrder0(final ByteBuffer cp, final ArithmeticDecoder decoder, final RANSDecodingSymbol[] decodingSymbols) { - // Pre-compute reverse lookup of frequency. - int rle = 0; - int x = 0; - int j = cp.get() & 0xFF; - do { - if ((decoder.fc[j].F = (cp.get() & 0xFF)) >= 128) { - decoder.fc[j].F &= ~128; - decoder.fc[j].F = ((decoder.fc[j].F & 127) << 8) | (cp.get() & 0xFF); - } - decoder.fc[j].C = x; - - decodingSymbols[j].set(decoder.fc[j].C, decoder.fc[j].F); - - /* Build reverse lookup table */ - Arrays.fill(decoder.R, x, x + decoder.fc[j].F, (byte) j); - - x += decoder.fc[j].F; - - if (rle == 0 && j + 1 == (0xFF & cp.get(cp.position()))) { - j = cp.get() & 0xFF; - rle = cp.get() & 0xFF; - } else if (rle != 0) { - rle--; - j++; - } else { - j = cp.get() & 0xFF; - } - } while (j != 0); - - assert (x < Constants.TOTFREQ); - } - - static void readStatsOrder1(final ByteBuffer cp, final ArithmeticDecoder[] D, final RANSDecodingSymbol[][] decodingSymbols) { - int rle_i = 0; - int i = 0xFF & cp.get(); - do { - int rle_j = 0; - int x = 0; - int j = 0xFF & cp.get(); - do { - if ((D[i].fc[j].F = (0xFF & cp.get())) >= 128) { - D[i].fc[j].F &= ~128; - D[i].fc[j].F = ((D[i].fc[j].F & 127) << 8) | (0xFF & cp.get()); - } - D[i].fc[j].C = x; - - if (D[i].fc[j].F == 0) { - D[i].fc[j].F = Constants.TOTFREQ; - } - - decodingSymbols[i][j].set( - D[i].fc[j].C, - D[i].fc[j].F - ); - - /* Build reverse lookup table */ - Arrays.fill(D[i].R, x, x + D[i].fc[j].F, (byte) j); - - x += D[i].fc[j].F; - assert (x <= Constants.TOTFREQ); - - if (rle_j == 0 && j + 1 == (0xFF & cp.get(cp.position()))) { - j = (0xFF & cp.get()); - rle_j = (0xFF & cp.get()); - } else if (rle_j != 0) { - rle_j--; - j++; - } else { - j = (0xFF & cp.get()); - } - } while (j != 0); - - if (rle_i == 0 && i + 1 == (0xFF & cp.get(cp.position()))) { - i = (0xFF & cp.get()); - rle_i = (0xFF & cp.get()); - } else if (rle_i != 0) { - rle_i--; - i++; - } else { - i = (0xFF & cp.get()); - } - } while (i != 0); - } - - static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { - final int inSize = inBuffer.remaining(); - - // Compute statistics - final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; - int T = 0; //// T is the total number of symbols in the input - for (int i = 0; i < inSize; i++) { - F[0xFF & inBuffer.get()]++; - T++; - } - final long tr = ((long) Constants.TOTFREQ << 31) / T + (1 << 30) / T; - - // Normalise so T[i] == TOTFREQ - // m is the maximum frequency value - // M is the symbol that has the maximum frequency - int m = 0; - int M = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (m < F[j]) { - m = F[j]; - M = j; - } - } - - int fsum = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F[j] == 0) { - continue; - } - // using tr to normalize symbol frequencies such that their total = (1<<12) = 4096 - if ((F[j] = (int) ((F[j] * tr) >> 31)) == 0) { - // make sure that a non-zero symbol frequency is not incorrectly set to 0. - // Change it to 1 if the calculated value is 0. - F[j] = 1; - } - fsum += F[j]; - } - - fsum++; - // adjust the frequency of the symbol with maximum frequency to make sure that - // the sum of frequencies of all the symbols = 4096 - if (fsum < Constants.TOTFREQ) { - F[M] += Constants.TOTFREQ - fsum; - } else { - F[M] -= fsum - Constants.TOTFREQ; - } - - assert (F[M] > 0); - return F; - } - - static int[][] calcFrequenciesOrder1(final ByteBuffer in) { - final int in_size = in.remaining(); - - final int[][] F = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; - final int[] T = new int[Constants.NUMBER_OF_SYMBOLS]; - int c; - - int last_i = 0; - for (int i = 0; i < in_size; i++) { - F[last_i][c = (0xFF & in.get())]++; - T[last_i]++; - last_i = c; - } - F[0][0xFF & in.get((in_size >> 2))]++; - F[0][0xFF & in.get(2 * (in_size >> 2))]++; - F[0][0xFF & in.get(3 * (in_size >> 2))]++; - T[0] += 3; - - for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - if (T[i] == 0) { - continue; - } - - final double p = ((double) Constants.TOTFREQ) / T[i]; - int t2 = 0, m = 0, M = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F[i][j] == 0) - continue; - - if (m < F[i][j]) { - m = F[i][j]; - M = j; - } - - if ((F[i][j] *= p) == 0) - F[i][j] = 1; - t2 += F[i][j]; - } - - t2++; - if (t2 < Constants.TOTFREQ) { - F[i][M] += Constants.TOTFREQ - t2; - } else { - F[i][M] -= t2 - Constants.TOTFREQ; - } - } - - return F; - } - - static RANSEncodingSymbol[] buildSymsOrder0(final int[] F, final RANSEncodingSymbol[] syms) { - final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; - - // T = running sum of frequencies including the current symbol - // F[j] = frequency of symbol "j" - // C[j] = cumulative frequency of all the symbols preceding "j" (and excluding the frequency of symbol "j") - int T = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - C[j] = T; - T += F[j]; - if (F[j] != 0) { - //For each symbol, set start = cumulative frequency and freq = frequency - syms[j].set(C[j], F[j], Constants.TF_SHIFT); - } - } - return syms; - } - - static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { - final int start = cp.position(); - - int rle = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F[j] != 0) { - // j - if (rle != 0) { - rle--; - } else { - // write the symbol if it is the first symbol or if rle = 0. - // if rle != 0, then skip writing the symbol. - cp.put((byte) j); - // We've encoded two symbol frequencies in a row. - // How many more are there? Store that count so - // we can avoid writing consecutive symbols. - // Note: maximum possible rle = 254 - // rle requires atmost 1 byte - if (rle == 0 && j != 0 && F[j - 1] != 0) { - for (rle = j + 1; rle < 256 && F[rle] != 0; rle++) - ; - rle -= j + 1; - cp.put((byte) rle); - } - } - - // F[j] - if (F[j] < 128) { - cp.put((byte) (F[j])); - } else { - // if F[j] >127, it is written in 2 bytes - cp.put((byte) (128 | (F[j] >> 8))); - cp.put((byte) (F[j] & 0xff)); - } - } - } - - // write 0 indicating the end of frequency table - cp.put((byte) 0); - return cp.position() - start; - } - - static RANSEncodingSymbol[][] buildSymsOrder1(final int[][] F, final RANSEncodingSymbol[][] syms) { - for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - final int[] F_i_ = F[i]; - int x = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F_i_[j] != 0) { - syms[i][j].set(x, F_i_[j], Constants.TF_SHIFT); - x += F_i_[j]; - } - } - } - - return syms; - } - - static int writeFrequenciesOrder1(final ByteBuffer cp, final int[][] F) { - final int start = cp.position(); - final int[] T = new int[Constants.NUMBER_OF_SYMBOLS]; - - for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - T[i] += F[i][j]; - } - } - - int rle_i = 0; - for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - if (T[i] == 0) { - continue; - } - - // Store frequency table - // i - if (rle_i != 0) { - rle_i--; - } else { - cp.put((byte) i); - // FIXME: could use order-0 statistics to observe which alphabet - // symbols are present and base RLE on that ordering instead. - if (i != 0 && T[i - 1] != 0) { - for (rle_i = i + 1; rle_i < 256 && T[rle_i] != 0; rle_i++) - ; - rle_i -= i + 1; - cp.put((byte) rle_i); - } - } - - final int[] F_i_ = F[i]; - int rle_j = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F_i_[j] != 0) { - - // j - if (rle_j != 0) { - rle_j--; - } else { - cp.put((byte) j); - if (rle_j == 0 && j != 0 && F_i_[j - 1] != 0) { - for (rle_j = j + 1; rle_j < 256 && F_i_[rle_j] != 0; rle_j++) - ; - rle_j -= j + 1; - cp.put((byte) rle_j); - } - } - - // F_i_[j] - if (F_i_[j] < 128) { - cp.put((byte) F_i_[j]); - } else { - cp.put((byte) (128 | (F_i_[j] >> 8))); - cp.put((byte) (F_i_[j] & 0xff)); - } - } - } - cp.put((byte) 0); - } - cp.put((byte) 0); - - return cp.position() - start; - } - -} diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/FrequenciesNx16.java b/src/main/java/htsjdk/samtools/cram/compression/rans/FrequenciesNx16.java deleted file mode 100644 index d77c1a7898..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/FrequenciesNx16.java +++ /dev/null @@ -1,205 +0,0 @@ -package htsjdk.samtools.cram.compression.rans; - -import java.nio.ByteBuffer; -import java.util.Arrays; - -public class FrequenciesNx16 { - - static int[] readAlphabet(final ByteBuffer cp){ - // gets the list of alphabets whose frequency!=0 - final int[] A = new int[Constants.NUMBER_OF_SYMBOLS]; - for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - A[i]=0; - } - int rle = 0; - int sym = cp.get() & 0xFF; - int last_sym = sym; - do { - A[sym] = 1; - if (rle!=0) { - rle--; - sym++; - } else { - sym = cp.get() & 0xFF; - if (sym == last_sym+1) - rle = cp.get() & 0xFF; - } - last_sym = sym; - } while (sym != 0); - return A; - } - - static void readStatsOrder0( - final ByteBuffer cp, - ArithmeticDecoder decoder, - RANSDecodingSymbol[] decodingSymbols) { - // Use the Frequency table to set the values of F, C and R - final int[] A = readAlphabet(cp); - int x = 0; - final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; - - // read F, normalise F then calculate C and R - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (A[j] > 0) { - if ((F[j] = (cp.get() & 0xFF)) >= 128){ - F[j] &= ~128; - F[j] = (( F[j] &0x7f) << 7) | (cp.get() & 0x7F); - } - } - } - normaliseFrequenciesOrder0(F,12); - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if(A[j]>0){ - - // decoder.fc[j].F -> Frequency - // decoder.fc[j].C -> Cumulative Frequency preceding the current symbol - decoder.fc[j].F = F[j]; - decoder.fc[j].C = x; - decodingSymbols[j].set(decoder.fc[j].C, decoder.fc[j].F); - - // R -> Reverse Lookup table - Arrays.fill(decoder.R, x, x + decoder.fc[j].F, (byte) j); - x += decoder.fc[j].F; - } - } - } - - static int[] buildFrequenciesOrder0(final ByteBuffer inBuffer) { - // Returns an array of raw symbol frequencies - final int inSize = inBuffer.remaining(); - final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; - for (int i = 0; i < inSize; i++) { - F[0xFF & inBuffer.get()]++; - } - return F; - } - - static int[] normaliseFrequenciesOrder0(final int[] F, final int bits) { - // Returns an array of normalised Frequencies, - // such that the frequencies add up to 1<> 31)) == 0) { - - // A non-zero symbol frequency should not be incorrectly set to 0. - // If the calculated value is 0, change it to 1 - F[j] = 1; - } - fsum += F[j]; - } - - // adjust the frequency of the symbol "M" such that - // the sum of frequencies of all the symbols = renormFreq - if (fsum < renormFreq) { - F[M] += renormFreq - fsum; - } else if (fsum > renormFreq){ - F[M] -= fsum - renormFreq; - } - return F; - } - - static RANSEncodingSymbol[] buildSymsOrder0(final int[] F, final RANSEncodingSymbol[] syms) { - // updates the RANSEncodingSymbol array for all the symbols - final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; - - // T = running sum of frequencies including the current symbol - // F[j] = frequency of symbol "j" - // C[j] = cumulative frequency of all the symbols preceding "j" (excluding the frequency of symbol "j") - int T = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - C[j] = T; - T += F[j]; - if (F[j] != 0) { - - //For each symbol, set start = cumulative frequency and freq = frequency - syms[j].set(C[j], F[j], Constants.TF_SHIFT); - } - } - return syms; - } - - static void writeAlphabet(final ByteBuffer cp, final int[] F) { - // Uses Run Length Encoding to write all the symbols whose frequency!=0 - int rle = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F[j] != 0) { - if (rle != 0) { - rle--; - } else { - - // write the symbol if it is the first symbol or if rle = 0. - // if rle != 0, then skip writing the symbol. - cp.put((byte) j); - - // We've encoded two symbol frequencies in a row. - // How many more are there? Store that count so - // we can avoid writing consecutive symbols. - // Note: maximum possible rle = 254 - // rle requires atmost 1 byte - if (rle == 0 && j != 0 && F[j - 1] != 0) { - for (rle = j + 1; rle < Constants.NUMBER_OF_SYMBOLS && F[rle] != 0; rle++); - rle -= j + 1; - cp.put((byte) rle); - } - } - } - } - - // write 0 indicating the end of alphabet - cp.put((byte) 0); - } - - static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { - // Order 0 frequencies store the complete alphabet of observed - // symbols using run length encoding, followed by a table of frequencies - // for each symbol in the alphabet. - final int start = cp.position(); - - // write the alphabet first and then their frequencies - writeAlphabet(cp,F); - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F[j] != 0) { - if (F[j] < 128) { - cp.put((byte) (F[j] & 0x7f)); - } else { - - // if F[j] >127, it is written in 2 bytes - // right shift by 7 and get the most Significant Bits. - // Set the Most Significant Bit of the first byte to 1 indicating that the frequency comprises of 2 bytes - cp.put((byte) (128 | (F[j] >> 7))); - cp.put((byte) (F[j] & 0x7f)); //Least Significant 7 Bits - } - } - } - return cp.position() - start; - } - -} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/FrequencyUtils.java b/src/main/java/htsjdk/samtools/cram/compression/rans/FrequencyUtils.java deleted file mode 100644 index 63c43ee669..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/FrequencyUtils.java +++ /dev/null @@ -1,55 +0,0 @@ -package htsjdk.samtools.cram.compression.rans; - -public class FrequencyUtils { - static int[] normaliseFrequenciesOrder0(final int[] F, final int bits) { - // Returns an array of normalised Frequencies, - // such that the frequencies add up to 1<> 31)) == 0) { - - // A non-zero symbol frequency should not be incorrectly set to 0. - // If the calculated value is 0, change it to 1 - F[j] = 1; - } - fsum += F[j]; - } - - // adjust the frequency of the symbol "M" such that - // the sum of frequencies of all the symbols = renormFreq - if (fsum < renormFreq) { - F[M] += renormFreq - fsum; - } else if (fsum > renormFreq){ - F[M] -= fsum - renormFreq; - } - return F; - } -} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Params.java deleted file mode 100644 index 0714c08107..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Params.java +++ /dev/null @@ -1,16 +0,0 @@ -package htsjdk.samtools.cram.compression.rans; - -public class RANS4x8Params implements RANSParams{ - - private ORDER order; - - public RANS4x8Params(ORDER order) { - this.order = order; - } - - @Override - public ORDER getOrder() { - return order; - } - -} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java index cf2f5303b1..e38b6745f1 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java @@ -7,15 +7,15 @@ public abstract class RANSDecode { private RANSDecodingSymbol[][] decodingSymbols; // GETTERS - public ArithmeticDecoder[] getD() { + protected ArithmeticDecoder[] getD() { return D; } - public RANSDecodingSymbol[][] getDecodingSymbols() { + protected RANSDecodingSymbol[][] getDecodingSymbols() { return decodingSymbols; } - abstract ByteBuffer uncompress(final ByteBuffer inBuffer, final T params); + public abstract ByteBuffer uncompress(final ByteBuffer inBuffer, final T params); // Lazy initialization of working memory for the decoder protected void initializeRANSDecoder() { diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java index 49a18f2904..44ce0ad050 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java @@ -26,7 +26,7 @@ import java.nio.ByteBuffer; -final class RANSDecodingSymbol { +final public class RANSDecodingSymbol { int start; // Start of range. int freq; // Symbol frequency. diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java index 244b1082ce..8d10dff149 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java @@ -7,11 +7,11 @@ public abstract class RANSEncode { private RANSEncodingSymbol[][] encodingSymbols; // Getter - public RANSEncodingSymbol[][] getEncodingSymbols() { + protected RANSEncodingSymbol[][] getEncodingSymbols() { return encodingSymbols; } - abstract ByteBuffer compress(final ByteBuffer inBuffer, final T params); + public abstract ByteBuffer compress(final ByteBuffer inBuffer, final T params); // Lazy initialization of working memory for the encoder protected void initializeRANSEncoder() { diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java index e1e9b0b549..a34dd4855d 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java @@ -28,7 +28,7 @@ import java.nio.ByteBuffer; -final class RANSEncodingSymbol { +public final class RANSEncodingSymbol { private int xMax; // (Exclusive) upper bound of pre-normalization interval private int rcpFreq; // Fixed-point reciprocal frequency private int bias; // Bias diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java index f8066ad6d0..e4ce815a25 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java @@ -2,7 +2,7 @@ import java.nio.ByteBuffer; -final class Utils { +final public class Utils { private static void reverse(final byte[] array, final int offset, final int size) { if (array == null) { @@ -20,7 +20,7 @@ private static void reverse(final byte[] array, final int offset, final int size } } - static void reverse(final ByteBuffer byteBuffer) { + public static void reverse(final ByteBuffer byteBuffer) { byte tmp; if (byteBuffer.hasArray()) { reverse(byteBuffer.array(), byteBuffer.arrayOffset(), byteBuffer.limit()); @@ -34,12 +34,12 @@ static void reverse(final ByteBuffer byteBuffer) { } // Returns the current cumulative frequency (map it to a symbol yourself!) - static int RANSDecodeGet(final int r, final int scaleBits) { + public static int RANSDecodeGet(final int r, final int scaleBits) { return r & ((1 << scaleBits) - 1); } // Re-normalize. - static int RANSDecodeRenormalize4x8(int r, final ByteBuffer byteBuffer) { + public static int RANSDecodeRenormalize4x8(int r, final ByteBuffer byteBuffer) { // re-normalize //rans4x8 @@ -51,7 +51,7 @@ static int RANSDecodeRenormalize4x8(int r, final ByteBuffer byteBuffer) { return r; } - static int RANSDecodeRenormalizeNx16(int r, final ByteBuffer byteBuffer) { + public static int RANSDecodeRenormalizeNx16(int r, final ByteBuffer byteBuffer) { // ransNx16 if (r < (Constants.RANS_BYTE_L_Nx16)) { int i = (0xFF & byteBuffer.get()); @@ -87,4 +87,57 @@ public static int readUint7(ByteBuffer cp){ } while((c & 0x80)!=0); return i; } + + public static int[] normaliseFrequenciesOrder0(final int[] F, final int bits) { + // Returns an array of normalised Frequencies, + // such that the frequencies add up to 1<> 31)) == 0) { + + // A non-zero symbol frequency should not be incorrectly set to 0. + // If the calculated value is 0, change it to 1 + F[j] = 1; + } + fsum += F[j]; + } + + // adjust the frequency of the symbol "M" such that + // the sum of frequencies of all the symbols = renormFreq + if (fsum < renormFreq) { + F[M] += renormFreq - fsum; + } else if (fsum > renormFreq){ + F[M] -= fsum - renormFreq; + } + return F; + } + } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/D04.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D04.java similarity index 87% rename from src/main/java/htsjdk/samtools/cram/compression/rans/D04.java rename to src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D04.java index b3838b5b66..1048664fcc 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/D04.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D04.java @@ -1,8 +1,16 @@ -package htsjdk.samtools.cram.compression.rans; +package htsjdk.samtools.cram.compression.rans.rans4x8; + +import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; +import htsjdk.samtools.cram.compression.rans.Constants; +import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; +import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; final class D04 { + // TODO: move this method to RANS4x8Decode. Have the common code between D04 and D0N in utils class. + // same applies for other compress and uncompress methods. + static void uncompress( final ByteBuffer inBuffer, final ArithmeticDecoder D, diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/D14.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D14.java similarity index 88% rename from src/main/java/htsjdk/samtools/cram/compression/rans/D14.java rename to src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D14.java index 899905027c..37c4650597 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/D14.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D14.java @@ -1,4 +1,9 @@ -package htsjdk.samtools.cram.compression.rans; +package htsjdk.samtools.cram.compression.rans.rans4x8; + +import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; +import htsjdk.samtools.cram.compression.rans.Constants; +import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; +import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; import java.nio.ByteOrder; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/E04.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java similarity index 88% rename from src/main/java/htsjdk/samtools/cram/compression/rans/E04.java rename to src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java index f94e648194..10612475f1 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/E04.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java @@ -1,4 +1,8 @@ -package htsjdk.samtools.cram.compression.rans; +package htsjdk.samtools.cram.compression.rans.rans4x8; + +import htsjdk.samtools.cram.compression.rans.Constants; +import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; +import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/E14.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java similarity index 92% rename from src/main/java/htsjdk/samtools/cram/compression/rans/E14.java rename to src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java index 457e6fcd48..e0234c89b4 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/E14.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java @@ -1,4 +1,8 @@ -package htsjdk.samtools.cram.compression.rans; +package htsjdk.samtools.cram.compression.rans.rans4x8; + +import htsjdk.samtools.cram.compression.rans.Constants; +import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; +import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; import java.nio.ByteOrder; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java similarity index 85% rename from src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Decode.java rename to src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index 87d36bdc59..f43f6aeb00 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -1,4 +1,10 @@ -package htsjdk.samtools.cram.compression.rans; +package htsjdk.samtools.cram.compression.rans.rans4x8; + +import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; +import htsjdk.samtools.cram.compression.rans.Constants; +import htsjdk.samtools.cram.compression.rans.RANSDecode; +import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; +import htsjdk.samtools.cram.compression.rans.RANSParams; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -44,21 +50,22 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, final RANS4x8Params para private ByteBuffer uncompressOrder0Way4(final ByteBuffer inBuffer, final ByteBuffer outBuffer) { // read the frequency table. using the frequency table, set the values of RANSDecodingSymbols - readStatsOrder0(inBuffer, getD()[0], getDecodingSymbols()[0]); + readStatsOrder0(inBuffer); D04.uncompress(inBuffer, getD()[0], getDecodingSymbols()[0], outBuffer); - return outBuffer; } private ByteBuffer uncompressOrder1Way4(final ByteBuffer in, final ByteBuffer outBuffer) { // read the frequency table. using the frequency table, set the values of RANSDecodingSymbols - readStatsOrder1(in, getD(), getDecodingSymbols()); + readStatsOrder1(in); D14.uncompress(in, outBuffer, getD(), getDecodingSymbols()); return outBuffer; } - private void readStatsOrder0(final ByteBuffer cp, final ArithmeticDecoder decoder, final RANSDecodingSymbol[] decodingSymbols) { + private void readStatsOrder0(final ByteBuffer cp) { // Pre-compute reverse lookup of frequency. + final ArithmeticDecoder decoder = getD()[0]; + final RANSDecodingSymbol[] decodingSymbols = getDecodingSymbols()[0]; int rle = 0; int x = 0; int j = cp.get() & 0xFF; @@ -90,7 +97,9 @@ private void readStatsOrder0(final ByteBuffer cp, final ArithmeticDecoder decode assert (x < Constants.TOTFREQ); } - private void readStatsOrder1(final ByteBuffer cp, final ArithmeticDecoder[] D, final RANSDecodingSymbol[][] decodingSymbols) { + private void readStatsOrder1(final ByteBuffer cp) { + final ArithmeticDecoder[] D = getD(); + final RANSDecodingSymbol[][] decodingSymbols = getDecodingSymbols(); int rle_i = 0; int i = 0xFF & cp.get(); do { diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java similarity index 95% rename from src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Encode.java rename to src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index e842342fcc..097c332522 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -1,5 +1,9 @@ -package htsjdk.samtools.cram.compression.rans; +package htsjdk.samtools.cram.compression.rans.rans4x8; +import htsjdk.samtools.cram.compression.rans.Constants; +import htsjdk.samtools.cram.compression.rans.RANSEncode; +import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; +import htsjdk.samtools.cram.compression.rans.RANSParams; import htsjdk.utils.ValidationUtils; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -50,7 +54,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { final int[] F = calcFrequenciesOrder0(inBuffer); // using the normalised frequencies, set the RANSEncodingSymbols - buildSymsOrder0(F, getEncodingSymbols()[0]); + buildSymsOrder0(F); final ByteBuffer cp = outBuffer.slice(); @@ -76,7 +80,7 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { final int[][] F = calcFrequenciesOrder1(inBuffer); // using the normalised frequencies, set the RANSEncodingSymbols - buildSymsOrder1(F, getEncodingSymbols()); + buildSymsOrder1(F); final ByteBuffer cp = outBuffer.slice(); final int frequencyTableSize = writeFrequenciesOrder1(cp, F); @@ -212,7 +216,8 @@ private static int[][] calcFrequenciesOrder1(final ByteBuffer in) { return F; } - private static RANSEncodingSymbol[] buildSymsOrder0(final int[] F, final RANSEncodingSymbol[] syms) { + private RANSEncodingSymbol[] buildSymsOrder0(final int[] F) { + final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; // T = running sum of frequencies including the current symbol @@ -230,7 +235,8 @@ private static RANSEncodingSymbol[] buildSymsOrder0(final int[] F, final RANSEnc return syms; } - private static RANSEncodingSymbol[][] buildSymsOrder1(final int[][] F, final RANSEncodingSymbol[][] syms) { + private RANSEncodingSymbol[][] buildSymsOrder1(final int[][] F) { + final RANSEncodingSymbol[][] syms = getEncodingSymbols(); for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { final int[] F_i_ = F[i]; int x = 0; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java new file mode 100644 index 0000000000..223db704e2 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java @@ -0,0 +1,23 @@ +package htsjdk.samtools.cram.compression.rans.rans4x8; + +import htsjdk.samtools.cram.compression.rans.RANSParams; + +public class RANS4x8Params implements RANSParams { + + private ORDER order; + + public RANS4x8Params(ORDER order) { + this.order = order; + } + + @Override + public String toString() { + return "RANS4x8Params{" + "order=" + order + "}"; + } + + @Override + public ORDER getOrder() { + return order; + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/D0N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java similarity index 84% rename from src/main/java/htsjdk/samtools/cram/compression/rans/D0N.java rename to src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java index 6a9ba40b86..0a2b3f6fbe 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/D0N.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java @@ -1,4 +1,9 @@ -package htsjdk.samtools.cram.compression.rans; +package htsjdk.samtools.cram.compression.rans.ransnx16; + +import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; +import htsjdk.samtools.cram.compression.rans.Constants; +import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; +import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/E0N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java similarity index 88% rename from src/main/java/htsjdk/samtools/cram/compression/rans/E0N.java rename to src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java index 64de2dea77..18f3c405b4 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/E0N.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java @@ -1,4 +1,8 @@ -package htsjdk.samtools.cram.compression.rans; +package htsjdk.samtools.cram.compression.rans.ransnx16; + +import htsjdk.samtools.cram.compression.rans.Constants; +import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; +import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java similarity index 87% rename from src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Decode.java rename to src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 0f27193346..fa4755613b 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -1,4 +1,11 @@ -package htsjdk.samtools.cram.compression.rans; +package htsjdk.samtools.cram.compression.rans.ransnx16; + +import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; +import htsjdk.samtools.cram.compression.rans.Constants; +import htsjdk.samtools.cram.compression.rans.RANSDecode; +import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; +import htsjdk.samtools.cram.compression.rans.RANSParams; +import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -61,16 +68,16 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, final RANSNx16Params pa private ByteBuffer uncompressOrder0WayN(final ByteBuffer inBuffer, final ByteBuffer outBuffer,final int n_out,final int Nway) { // read the frequency table, get the normalised frequencies and use it to set the RANSDecodingSymbols - readStatsOrder0(inBuffer, getD()[0], getDecodingSymbols()[0]); + readStatsOrder0(inBuffer); // uncompress using Nway rans states D0N.uncompress(inBuffer, getD()[0], getDecodingSymbols()[0], outBuffer,n_out,Nway); return outBuffer; } - private static void readStatsOrder0( - final ByteBuffer cp, - ArithmeticDecoder decoder, - RANSDecodingSymbol[] decodingSymbols) { + private void readStatsOrder0( + final ByteBuffer cp) { + final ArithmeticDecoder decoder = getD()[0]; + final RANSDecodingSymbol[] decodingSymbols = getDecodingSymbols()[0]; // Use the Frequency table to set the values of F, C and R final int[] A = readAlphabet(cp); int x = 0; @@ -85,7 +92,7 @@ private static void readStatsOrder0( } } } - FrequencyUtils.normaliseFrequenciesOrder0(F,12); + Utils.normaliseFrequenciesOrder0(F,12); for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if(A[j]>0){ diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java similarity index 91% rename from src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Encode.java rename to src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index b451310b67..7ed2cbe6c0 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -1,8 +1,14 @@ -package htsjdk.samtools.cram.compression.rans; +package htsjdk.samtools.cram.compression.rans.ransnx16; + +import htsjdk.samtools.cram.compression.rans.Constants; +import htsjdk.samtools.cram.compression.rans.RANSEncode; +import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; +import htsjdk.samtools.cram.compression.rans.RANSParams; +import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; -public class RANSNx16Encode extends RANSEncode{ +public class RANSNx16Encode extends RANSEncode { private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); private static final int MINIMUM__ORDER_1_SIZE = 4; @@ -71,16 +77,16 @@ private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final int Nway, final int prefix_size = outBuffer.position(); // Normalize Frequencies such that sum of Frequencies = 1 << bitsize - FrequencyUtils.normaliseFrequenciesOrder0(F, bitSize); + Utils.normaliseFrequenciesOrder0(F, bitSize); // Write the Frequency table. Keep track of the size for later final int frequencyTableSize = writeFrequenciesOrder0(cp, F); // Normalize Frequencies such that sum of Frequencies = 1 << 12 - FrequencyUtils.normaliseFrequenciesOrder0(F, 12); + Utils.normaliseFrequenciesOrder0(F, 12); // update the RANS Encoding Symbols - buildSymsOrder0(F, getEncodingSymbols()[0]); + buildSymsOrder0(F); inBuffer.rewind(); final int compressedBlobSize = E0N.compress(inBuffer, getEncodingSymbols()[0], cp, Nway); outBuffer.rewind(); // set position to 0 @@ -154,7 +160,8 @@ private static void writeAlphabet(final ByteBuffer cp, final int[] F) { cp.put((byte) 0); } - private static RANSEncodingSymbol[] buildSymsOrder0(final int[] F, final RANSEncodingSymbol[] syms) { + private RANSEncodingSymbol[] buildSymsOrder0(final int[] F) { + final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; // updates the RANSEncodingSymbol array for all the symbols final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java similarity index 80% rename from src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Params.java rename to src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java index 16d6cd42ef..f753e7e144 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSNx16Params.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java @@ -1,6 +1,8 @@ -package htsjdk.samtools.cram.compression.rans; +package htsjdk.samtools.cram.compression.rans.ransnx16; -public class RANSNx16Params implements RANSParams{ +import htsjdk.samtools.cram.compression.rans.RANSParams; + +public class RANSNx16Params implements RANSParams { // format is the first byte of the compressed data stream, // which consists of all the bit-flags detailing the type of transformations @@ -26,13 +28,18 @@ public RANSNx16Params(int formatFlags) { this.formatFlags = formatFlags; } + @Override + public String toString() { + return "RANSNx16Params{" + "formatFlags=" + formatFlags + "}"; + } + @Override public ORDER getOrder() { // Rans Order ZERO or ONE encoding return ORDER.fromInt(formatFlags & ORDER_FLAG_MASK); //convert into order type } - protected int getFormatFlags(){ + public int getFormatFlags(){ // Least significant 7 bits of the format return formatFlags & FORMAT_FLAG_MASK; } @@ -41,32 +48,32 @@ public void setFormatFlags(int formatFlags) { this.formatFlags = formatFlags; } - protected boolean getX32(){ + public boolean getX32(){ // Interleave N = 32 rANS states (else N = 4) return ((formatFlags & X32_FLAG_MASK)!=0); } - protected boolean getStripe(){ + public boolean getStripe(){ // multiway interleaving of byte streams return ((formatFlags & STRIPE_FLAG_MASK)!=0); } - protected boolean getNosz(){ + public boolean getNosz(){ // original size is not recorded (for use by Stripe) return ((formatFlags & NOSZ_FLAG_MASK)!=0); } - protected boolean getCAT(){ + public boolean getCAT(){ // Data is uncompressed return ((formatFlags & CAT_FLAG_MASK)!=0); } - protected boolean getRLE(){ + public boolean getRLE(){ // Run length encoding, with runs and literals encoded separately return ((formatFlags & RLE_FLAG_MASK)!=0); } - protected boolean getPack(){ + public boolean getPack(){ // Pack 2, 4, 8 or infinite symbols per byte return ((formatFlags & PACK_FLAG_MASK)!=0); } diff --git a/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java b/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java index 0b3137a473..123c361132 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java @@ -26,7 +26,7 @@ import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.compression.ExternalCompressor; -import htsjdk.samtools.cram.compression.rans.RANS4x8Params; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; import htsjdk.samtools.cram.encoding.CRAMEncoding; import htsjdk.samtools.cram.encoding.external.ByteArrayStopEncoding; import htsjdk.samtools.cram.encoding.external.ExternalByteEncoding; diff --git a/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java b/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java index 9026daa004..4f1f781697 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java @@ -26,7 +26,9 @@ import htsjdk.samtools.cram.compression.ExternalCompressor; import htsjdk.samtools.cram.compression.RANSExternalCompressor; -import htsjdk.samtools.cram.compression.rans.*; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; import htsjdk.samtools.util.Tuple; import htsjdk.utils.ValidationUtils; diff --git a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java index b952bdcd55..3476c516bb 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java @@ -1,7 +1,15 @@ package htsjdk.samtools.cram; import htsjdk.HtsjdkTest; -import htsjdk.samtools.cram.compression.rans.*; +import htsjdk.samtools.cram.compression.rans.RANSDecode; +import htsjdk.samtools.cram.compression.rans.RANSEncode; +import htsjdk.samtools.cram.compression.rans.RANSParams; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; +import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Decode; +import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Encode; +import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Params; import org.apache.commons.compress.utils.IOUtils; import org.testng.Assert; import org.testng.SkipException; @@ -15,7 +23,9 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.stream.Stream; /** * HTSCodecs test data is kept in a separate repository, currently at https://github.com/jkbonfield/htscodecs-corpus @@ -38,153 +48,109 @@ public void testGetHTSCodecsCorpus() { //TODO: the TestDataProviders tests fail if the hts codecs corpus isn't available because - @DataProvider(name = "rans4x8") + // RANS4x8 codecs and testdata public Object[][] getRANS4x8TestData() throws IOException { // cache/reuse this for each test case to eliminate excessive garbage collection final RANS4x8Encode rans4x8Encode = new RANS4x8Encode(); final RANS4x8Decode rans4x8Decode = new RANS4x8Decode(); - final RANS4x8Params params0 = new RANS4x8Params(RANSParams.ORDER.ZERO); // RANS 4x8 order 0 - final RANS4x8Params params1 = new RANS4x8Params(RANSParams.ORDER.ONE); // RANS 4x8 order 1 final List testCases = new ArrayList<>(); getHtsCodecRANSTestFiles().stream() .forEach(p -> { - testCases.add(new Object[] {p, rans4x8Encode , rans4x8Decode, params0 }); - testCases.add(new Object[] {p, rans4x8Encode , rans4x8Decode, params1 }); + // RANS 4x8 order 0 + testCases.add(new Object[] { + p, + rans4x8Encode , + rans4x8Decode, + new RANS4x8Params(RANSParams.ORDER.ZERO), + "r4x8" // htscodecs directory where the RANS4x8 compressed files reside + }); + // RANS 4x8 order 1 + testCases.add(new Object[] { + p, + rans4x8Encode , + rans4x8Decode, + new RANS4x8Params(RANSParams.ORDER.ONE), + "r4x8" // htscodecs directory where the RANS4x8 compressed files reside + }); }); return testCases.toArray(new Object[][]{}); } - @DataProvider(name = "ransNx16") + // RANSNx16 codecs and testdata public Object[][] getRANS4x16TestData() throws IOException { final RANSNx16Encode ransNx16Encode = new RANSNx16Encode(); final RANSNx16Decode ransNx16Decode = new RANSNx16Decode(); - final RANSNx16Params params0 = new RANSNx16Params(0); // RANS Nx16 order 0, none of the bit flags are set final List testCases = new ArrayList<>(); getHtsCodecRANSTestFiles().stream() .forEach(p -> { - testCases.add(new Object[] {p, ransNx16Encode, ransNx16Decode , params0}); + // RANS Nx16 order 0, none of the bit flags are set + testCases.add(new Object[] { + p, + ransNx16Encode, + ransNx16Decode , + new RANSNx16Params(0), + "r4x16" // htscodecs directory where the RANSNx16 compressed files reside + }); }); return testCases.toArray(new Object[][]{}); } - @Test ( - dataProvider = "rans4x8", - dependsOnMethods = "testGetHTSCodecsCorpus", - description = "Roundtrip using htsjdk RANS4x8." + - " Compare the output with the original file" ) - public void testRANSRoundTrip4x8( - final Path inputTestDataPath, - final RANS4x8Encode ransEncode, - final RANS4x8Decode ransDecode, - final RANS4x8Params params) throws IOException { - if (!CRAMCodecCorpus.isHtsCodecsTestDataAvailable()) { - throw new SkipException("htscodecs test data is not available locally"); - } - try (final InputStream is = Files.newInputStream(inputTestDataPath)) { - // preprocess the uncompressed data (to match what the htscodecs-library test harness does) - // by filtering out the embedded newlines, and then round trip through RANS and compare the - // results - - final ByteBuffer uncompressedBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(is))); - final ByteBuffer compressedBytes = ransEncode.compress(uncompressedBytes, params); - uncompressedBytes.rewind(); - System.out.println(String.format("%s RANS4x16 Order (%s) Uncompressed: (%,d) Compressed: (%,d)", - inputTestDataPath.getFileName(), - params.getOrder(), - uncompressedBytes.remaining(), - compressedBytes.remaining())); - Assert.assertEquals(ransDecode.uncompress(compressedBytes, params), uncompressedBytes); - } - } - - @Test ( - dataProvider = "rans4x8", - dependsOnMethods = "testGetHTSCodecsCorpus", - description = "Compress the original file using htsjdk RANS4x8 and compare it with the existing compressed file. " + - "Uncompress the existing compressed file using htsjdk RANS4x8 and compare it with the original file.") - public void testRANSPreCompressed4x8( - final Path inputTestDataPath, - final RANS4x8Encode ransEncode, - final RANS4x8Decode ransDecode, - final RANS4x8Params params ) throws IOException { - if (!CRAMCodecCorpus.isHtsCodecsTestDataAvailable()) { - throw new SkipException("htscodecs test data is not available locally"); - } - - final Path preCompressedDataPath = getCompressedRANSPath("4x8",inputTestDataPath, params.getOrder().ordinal()); - try (final InputStream inputStream = Files.newInputStream(inputTestDataPath); - final InputStream preCompressedInputStream = Files.newInputStream(preCompressedDataPath); - ) { - // preprocess the uncompressed data (to match what the htscodecs-library test harness does) - // by filtering out the embedded newlines, and then round trip through RANS and compare the - // results - final ByteBuffer inputBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(inputStream))); - - final ByteBuffer preCompressedInputBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInputStream)); - - // Use htsjdk to compress the input file from htscodecs repo - final ByteBuffer htsjdkCompressedBytes = ransEncode.compress(inputBytes, params); - inputBytes.rewind(); - - // Compare the htsjdk compressed bytes with the precompressed file from htscodecs repo - Assert.assertEquals(htsjdkCompressedBytes, preCompressedInputBytes); - - // Use htsjdk to uncompress the precompressed file from htscodecs repo - final ByteBuffer htsjdkUncompressedBytes = ransDecode.uncompress(preCompressedInputBytes, params); - - // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo - Assert.assertEquals(htsjdkUncompressedBytes, inputBytes); - } + @DataProvider(name = "allRansCodecsAndData") + public Object[][] getAllRansCodecs() throws IOException { + // concatenate RANS4x8 and RANSNx16 codecs and testdata + return Stream.concat(Arrays.stream(getRANS4x8TestData()), Arrays.stream(getRANS4x16TestData())) + .toArray(Object[][]::new); } @Test ( - dataProvider = "ransNx16", + dataProvider = "allRansCodecsAndData", dependsOnMethods = "testGetHTSCodecsCorpus", - description = "Roundtrip the original file using RANSNx16 htsjdk." + - " Compare the output with the original file" ) - public void testRANSRoundTripNx16( + description = "Roundtrip using htsjdk RANS. Compare the output with the original file" ) + public void testRANSRoundTrip( final Path inputTestDataPath, - final RANSNx16Encode ransEncode, - final RANSNx16Decode ransDecode, - final RANSNx16Params params) throws IOException { + final RANSEncode ransEncode, + final RANSDecode ransDecode, + final RANSParams params, + final String unusedCompressedDirname) throws IOException { if (!CRAMCodecCorpus.isHtsCodecsTestDataAvailable()) { throw new SkipException("htscodecs test data is not available locally"); } - try (final InputStream is = Files.newInputStream(inputTestDataPath)) { + // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results - final ByteBuffer uncompressedBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(is))); final ByteBuffer compressedBytes = ransEncode.compress(uncompressedBytes, params); uncompressedBytes.rewind(); - System.out.println(String.format("%s RANS4x16 Order (%s) Uncompressed: (%,d) Compressed: (%,d)", + System.out.println(String.format("filename:%s %s Uncompressed: (%,d) Compressed: (%,d)", inputTestDataPath.getFileName(), - params.getOrder(), + params.toString(), uncompressedBytes.remaining(), compressedBytes.remaining())); - Assert.assertEquals(ransDecode.uncompress(compressedBytes,params), uncompressedBytes); + Assert.assertEquals(ransDecode.uncompress(compressedBytes, params), uncompressedBytes); } } @Test ( - dataProvider = "ransNx16", + dataProvider = "allRansCodecsAndData", dependsOnMethods = "testGetHTSCodecsCorpus", - description = "Compress the original file using htsjdk RANSNx16 and compare it with the existing compressed file. " + - "Uncompress the existing compressed file using htsjdk RANSNx16 and compare it with the original file.") - public void testRANSPreCompressedNx16( + description = "Compress the original file using htsjdk RANS and compare it with the existing compressed file. " + + "Uncompress the existing compressed file using htsjdk RANS and compare it with the original file.") + public void testRANSPreCompressed( final Path inputTestDataPath, - final RANSNx16Encode ransEncode, - final RANSNx16Decode ransDecode, - final RANSNx16Params params) throws IOException { + final RANSEncode ransEncode, + final RANSDecode ransDecode, + final RANSParams params, + final String CompressedDirname) throws IOException { if (!CRAMCodecCorpus.isHtsCodecsTestDataAvailable()) { throw new SkipException("htscodecs test data is not available locally"); } - final Path preCompressedDataPath = getCompressedRANSPath("4x16",inputTestDataPath, params.getOrder().ordinal()); + final Path preCompressedDataPath = getCompressedRANSPath(CompressedDirname,inputTestDataPath, params.getOrder().ordinal()); + try (final InputStream inputStream = Files.newInputStream(inputTestDataPath); final InputStream preCompressedInputStream = Files.newInputStream(preCompressedDataPath); ) { @@ -221,7 +187,6 @@ private List getHtsCodecRANSTestFiles() throws IOException { path.getFileName().startsWith("qvar")) // q40+dir is excluded because the uncompressed size in the compressed file prefix does not match // the original file size. - // Q: why isn't q40+dir not included as it also startswith q4? .forEach(path -> paths.add(path)); return paths; } @@ -240,7 +205,7 @@ final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { // Given a test file name, map it to the corresponding rans compressed path final Path getCompressedRANSPath(final String ransType,final Path inputTestDataPath, int order) { - final String compressedFileName = String.format("r%s/%s.%s", ransType, inputTestDataPath.getFileName(), order); + final String compressedFileName = String.format("%s/%s.%s", ransType, inputTestDataPath.getFileName(), order); return inputTestDataPath.getParent().resolve(compressedFileName); } diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 5e7fd621ef..d6a0eea6b1 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -1,6 +1,12 @@ package htsjdk.samtools.cram.compression.rans; import htsjdk.HtsjdkTest; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; +import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Decode; +import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Encode; +import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Params; import htsjdk.samtools.util.TestUtil; import htsjdk.utils.TestNGUtils; import org.testng.Assert; @@ -51,15 +57,21 @@ public Object[][] getRansTestData() { }; } + public Object[][] getRansTestDataTinySmallLarge() { + return new Object[][]{ + { new TestCaseWrapper(randomBytesFromGeometricDistribution(100, 0.1)), 1, 100 }, // Tiny + { new TestCaseWrapper(randomBytesFromGeometricDistribution(1000, 0.01)), 4, 1000 }, // Small + { new TestCaseWrapper(randomBytesFromGeometricDistribution(100 * 1000 + 3, 0.01)), 100 * 1000 + 3 - 4, 100 * 1000 + 3 } // Large + }; + } + @DataProvider(name="rans4x8") public Object[][] getRans4x8Codecs() { final RANS4x8Encode rans4x8Encode = new RANS4x8Encode(); final RANS4x8Decode rans4x8Decode = new RANS4x8Decode(); - final RANS4x8Params rans4x8ParamsOrder0 = new RANS4x8Params(RANSParams.ORDER.ZERO); // RANS4x8 Order 0 - final RANS4x8Params rans4x8ParamsOrder1 = new RANS4x8Params(RANSParams.ORDER.ONE); // RANS4x8 Order 1 return new Object[][]{ - {rans4x8Encode, rans4x8Decode, rans4x8ParamsOrder0}, - {rans4x8Encode, rans4x8Decode, rans4x8ParamsOrder1} + {rans4x8Encode, rans4x8Decode, new RANS4x8Params(RANSParams.ORDER.ZERO)}, // RANS4x8 Order 0 + {rans4x8Encode, rans4x8Decode, new RANS4x8Params(RANSParams.ORDER.ONE)} // RANS4x8 Order 1 }; } @@ -67,98 +79,57 @@ public Object[][] getRans4x8Codecs() { public Object[][] getRansNx16Codecs() { final RANSNx16Encode ransNx16Encode = new RANSNx16Encode(); final RANSNx16Decode ransNx16Decode = new RANSNx16Decode(); - final RANSNx16Params ransNx16ParamsFormatFlags0 = new RANSNx16Params(0); //RANSNx16 formatFlags(first byte) 0 // TODO: More formatFlags values i.e, combinations of bit flags will be added later return new Object[][]{ - {ransNx16Encode, ransNx16Decode, ransNx16ParamsFormatFlags0} + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0)} //RANSNx16 formatFlags(first byte) 0 }; } - @DataProvider(name = "allRansCodecs") public Object[][] getAllRansCodecs() { // concatenate RANS4x8 and RANSNx16 codecs return Stream.concat(Arrays.stream(getRans4x8Codecs()), Arrays.stream(getRansNx16Codecs())) .toArray(Object[][]::new); } - @DataProvider(name="rans4x8AndData") - public Object[][] getRans4x8AndData() { - // this data provider provides all the testdata for RANS4x8 order 0 and order 1 - return TestNGUtils.cartesianProduct(getRansTestData(), getRans4x8Codecs()); + @DataProvider(name="allRansAndData") + public Object[][] getAllRansAndData() { + // this data provider provides all the testdata for all of RANS codecs + return TestNGUtils.cartesianProduct(getRansTestData(), getAllRansCodecs()); } - @DataProvider(name="ransNx16AndData") - public Object[][] getRansNx16AndData() { - // this data provider provides all the testdata for RANSNx16 formatFlags = 0 - return TestNGUtils.cartesianProduct(getRansTestData(), getRansNx16Codecs()); + @DataProvider(name="allRansAndDataForTinySmallLarge") + public Object[][] getAllRansAndDataForTinySmallLarge() { + // this data provider provides Tiny, Small and Large testdata for all of RANS codecs + return TestNGUtils.cartesianProduct(getRansTestDataTinySmallLarge(), getAllRansCodecs()); } - @Test(dataProvider = "allRansCodecs") - public void testSizeRangeTiny( - final RANSEncode ransEncode, - final RANSDecode ransDecode, - final RANSParams params) { - for (int i = 0; i < 20; i++) { - final byte[] data = randomBytesFromGeometricDistribution(100, 0.1); - final ByteBuffer in = ByteBuffer.wrap(data); - for (int size = 1; size < data.length; size++) { - in.position(0); - in.limit(size); - ransRoundTrip(in, ransEncode, ransDecode, params); - } - } - } - - @Test(dataProvider = "allRansCodecs") - public void testSizeRangeSmall( + @Test(dataProvider = "allRansAndDataForTinySmallLarge") + public void testSizeRangeTinySmallLarge( + final TestCaseWrapper tc, + final Integer lowerLimit, + final Integer upperLimit, final RANSEncode ransEncode, final RANSDecode ransDecode, - final RANSParams params) { - final byte[] data = randomBytesFromGeometricDistribution(1000, 0.01); - final ByteBuffer in = ByteBuffer.wrap(data); - for (int size = 4; size < data.length; size++) { + final RANSParams params){ + final ByteBuffer in = ByteBuffer.wrap(tc.testArray); + for (int size = lowerLimit; size < upperLimit; size++) { in.position(0); in.limit(size); ransRoundTrip(in, ransEncode, ransDecode, params); } } - @Test(dataProvider = "allRansCodecs") - public void testLargeSize( - final RANSEncode ransEncode, - final RANSDecode ransDecode, - final RANSParams params) { - final int size = 100 * 1000 + 3; - final byte[] data = randomBytesFromGeometricDistribution(size, 0.01); - final ByteBuffer in = ByteBuffer.wrap(data); - for (int limit = size - 4; limit < size; limit++) { - in.position(0); - in.limit(limit); - ransRoundTrip(in, ransEncode, ransDecode, params); - } - } - @Test(dataProvider = "rans4x8") public void testRans4x8BuffersMeetBoundaryExpectations( final RANS4x8Encode ransEncode, final RANS4x8Decode ransDecode, final RANS4x8Params params) { final int size = 1001; - final ByteBuffer raw = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); - final ByteBuffer compressed = ransEncode.compress(raw, params); - Assert.assertFalse(raw.hasRemaining()); - Assert.assertEquals(raw.limit(), size); - Assert.assertEquals(compressed.position(), 0); + final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(size,ransEncode, ransDecode,params); Assert.assertTrue(compressed.limit() > 10); Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); Assert.assertEquals(compressed.getInt(), compressed.limit() - 1 - 4 - 4); Assert.assertEquals(compressed.getInt(), size); - compressed.rewind(); - - final ByteBuffer uncompressed = ransDecode.uncompress(compressed,params); - Assert.assertFalse(compressed.hasRemaining()); - Assert.assertEquals(uncompressed.limit(), size); - Assert.assertEquals(uncompressed.position(), 0); } @Test(dataProvider = "ransNx16") @@ -167,11 +138,7 @@ public void testRansNx16BuffersMeetBoundaryExpectations( final RANSNx16Decode ransDecode, final RANSNx16Params params) { final int size = 1001; - final ByteBuffer raw = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); - final ByteBuffer compressed = ransEncode.compress(raw, params); - Assert.assertFalse(raw.hasRemaining()); - Assert.assertEquals(raw.limit(), size); - Assert.assertEquals(compressed.position(), 0); + final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(size,ransEncode, ransDecode,params); Assert.assertTrue(compressed.limit() > 1); // minimum prefix len when input is not Empty final int FormatFlags = compressed.get(); // first byte of compressed data is the formatFlags Assert.assertEquals(FormatFlags,params.getFormatFlags()); @@ -179,18 +146,12 @@ public void testRansNx16BuffersMeetBoundaryExpectations( if (!params.getNosz()){ Assert.assertEquals(Utils.readUint7(compressed), size); } - compressed.rewind(); - - final ByteBuffer uncompressed = ransDecode.uncompress(compressed,params); - Assert.assertFalse(compressed.hasRemaining()); - Assert.assertEquals(uncompressed.limit(), size); - Assert.assertEquals(uncompressed.position(), 0); } @Test(dataProvider = "rans4x8") public void testRans4x8Header( final RANS4x8Encode ransEncode, - final RANS4x8Decode ransDecode, + final RANS4x8Decode unused, final RANS4x8Params params) { final int size = 1000; final ByteBuffer data = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); @@ -206,7 +167,7 @@ public void testRans4x8Header( @Test(dataProvider = "ransNx16") public void testRansNx16Header( final RANSNx16Encode ransEncode, - final RANSNx16Decode ransDecode, + final RANSNx16Decode unused, final RANSNx16Params params) { final int size = 1000; final ByteBuffer data = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); @@ -219,17 +180,8 @@ public void testRansNx16Header( } } - @Test(dataProvider="rans4x8AndData") - public void testRANS4x8( - final TestCaseWrapper tc, - final RANSEncode ransEncode, - final RANSDecode ransDecode, - final RANSParams params) { - ransRoundTrip(ByteBuffer.wrap(tc.testArray), ransEncode, ransDecode, params); - } - - @Test(dataProvider="ransNx16AndData") - public void testRANSNx16( + @Test(dataProvider="allRansAndData") + public void testRANS( final TestCaseWrapper tc, final RANSEncode ransEncode, final RANSDecode ransDecode, @@ -254,6 +206,25 @@ private static void ransRoundTrip( Assert.assertFalse(uncompressed.hasRemaining()); } + public ByteBuffer ransBufferMeetBoundaryExpectations( + final int size, + final RANSEncode ransEncode, + final RANSDecode ransDecode, + final RANSParams params){ + // helper method for Boundary Expectations test + final ByteBuffer raw = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); + final ByteBuffer compressed = ransEncode.compress(raw, params); + final ByteBuffer uncompressed = ransDecode.uncompress(compressed,params); + Assert.assertFalse(compressed.hasRemaining()); + compressed.rewind(); + Assert.assertEquals(uncompressed.limit(), size); + Assert.assertEquals(uncompressed.position(), 0); + Assert.assertFalse(raw.hasRemaining()); + Assert.assertEquals(raw.limit(), size); + Assert.assertEquals(compressed.position(), 0); + return compressed; + } + private byte[] getNBytesWithValues(final int n, final BiFunction valueForIndex) { final byte[] data = new byte[n]; for (int i = 0; i < data.length; i++) { From 8582ab8774721838e1ca4a75668faff67aaab3f1 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 8 Mar 2022 10:40:56 -0500 Subject: [PATCH 05/76] filter out extra column from q40+dir file --- .../samtools/cram/CRAMCodecCorpusTest.java | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java index 3476c516bb..d882a73efc 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java @@ -184,18 +184,27 @@ private List getHtsCodecRANSTestFiles() throws IOException { CRAMCodecCorpus.getHTSCodecsTestDataLocation().resolve("dat"), path -> path.getFileName().startsWith("q4") || path.getFileName().startsWith("q8") || - path.getFileName().startsWith("qvar")) - // q40+dir is excluded because the uncompressed size in the compressed file prefix does not match - // the original file size. + path.getFileName().startsWith("qvar") || + path.getFileName().startsWith("q40+dir")) .forEach(path -> paths.add(path)); return paths; } // the input files have embedded newlines that the test remove before round-tripping... final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { + // 1. filters new lines if any. + // 2. "q40+dir" file has an extra column delimited by tab. This column provides READ1 vs READ2 flag. + // This file is also new-line separated. The extra column, '\t' and '\n' are filtered. try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + int skip = 0; for (final byte b : rawBytes) { - if (b != '\n') { + if (b == '\t'){ + skip = 1; + } + if (b == '\n') { + skip = 0; + } + if (skip == 0 && b !='\n') { baos.write(b); } } From 0769ecc1ecef05b1d7a48213fa461107ec4286a4 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Fri, 18 Mar 2022 12:09:05 -0400 Subject: [PATCH 06/76] rans nx16 order 1 freq tables + refactor --- .../compression/rans/ArithmeticDecoder.java | 21 ++- .../cram/compression/rans/Constants.java | 8 +- .../samtools/cram/compression/rans/FC.java | 35 ---- .../compression/rans/RANSDecodingSymbol.java | 8 +- .../cram/compression/rans/RANSEncode.java | 2 + .../samtools/cram/compression/rans/Utils.java | 74 +++++--- .../cram/compression/rans/rans4x8/D04.java | 40 ++-- .../cram/compression/rans/rans4x8/D14.java | 20 +- .../cram/compression/rans/rans4x8/E04.java | 8 +- .../cram/compression/rans/rans4x8/E14.java | 8 +- .../rans/rans4x8/RANS4x8Decode.java | 38 ++-- .../rans/rans4x8/RANS4x8Encode.java | 53 +++--- .../cram/compression/rans/ransnx16/D0N.java | 8 +- .../cram/compression/rans/ransnx16/D1N.java | 75 ++++++++ .../cram/compression/rans/ransnx16/E0N.java | 2 +- .../cram/compression/rans/ransnx16/E1N.java | 93 ++++++++++ .../rans/ransnx16/RANSNx16Decode.java | 118 ++++++++++-- .../rans/ransnx16/RANSNx16Encode.java | 171 ++++++++++++++++-- .../cram/compression/rans/RansTest.java | 69 +++---- 19 files changed, 623 insertions(+), 228 deletions(-) delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/FC.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E1N.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java index 7f6249bebe..6988fb8df6 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java @@ -24,24 +24,29 @@ */ package htsjdk.samtools.cram.compression.rans; +import static htsjdk.samtools.cram.compression.rans.Constants.NUMBER_OF_SYMBOLS; + final public class ArithmeticDecoder { - public final FC[] fc = new FC[256]; + public final int[] freq = new int[NUMBER_OF_SYMBOLS]; + public final int[] cumulativeFreq = new int[NUMBER_OF_SYMBOLS]; // reverse lookup table - public byte[] R = new byte[Constants.TOTFREQ]; + public byte[] reverseLookup = new byte[Constants.TOTAL_FREQ]; public ArithmeticDecoder() { - for (int i = 0; i < 256; i++) { - fc[i] = new FC(); + for (int i = 0; i < NUMBER_OF_SYMBOLS; i++) { + freq[i] = 0; + cumulativeFreq[i] = 0; } } public void reset() { - for (int i = 0; i < 256; i++) { - fc[i].reset(); + for (int i = 0; i < NUMBER_OF_SYMBOLS; i++) { + freq[i] = 0; + cumulativeFreq[i] = 0; } - for (int i = 0; i < Constants.TOTFREQ; i++) { - R[i] = 0; + for (int i = 0; i < Constants.TOTAL_FREQ; i++) { + reverseLookup[i] = 0; } } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java index 3cb6439481..2d35c60635 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java @@ -1,9 +1,9 @@ package htsjdk.samtools.cram.compression.rans; final public class Constants { - public static final int TF_SHIFT = 12; - public static final int TOTFREQ = (1 << TF_SHIFT); // 4096 - public static final int RANS_BYTE_L_4x8 = 1 << 23; - public static final int RANS_BYTE_L_Nx16 = 1 << 15; + public static final int TOTAL_FREQ_SHIFT = 12; + public static final int TOTAL_FREQ = (1 << TOTAL_FREQ_SHIFT); // 4096 + public static final int RANS_4x8_LOWER_BOUND = 1 << 23; + public static final int RANS_Nx16_LOWER_BOUND = 1 << 15; public static final int NUMBER_OF_SYMBOLS = 256; } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/FC.java b/src/main/java/htsjdk/samtools/cram/compression/rans/FC.java deleted file mode 100644 index d9cb3f66ad..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/FC.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2019 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -package htsjdk.samtools.cram.compression.rans; - -public final class FC { - public int F; - public int C; - - public void reset() { - F = C = 0; - } - -} diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java index 44ce0ad050..ebb7e9fabd 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java @@ -52,7 +52,7 @@ public int advanceSymbolStep(final int r, final int scaleBits) { // Advances in the bit stream by "popping" a single symbol with range start // "start" and frequency "freq". All frequencies are assumed to sum to // "1 << scale_bits". - public int advanceSymbol(final int rIn, final ByteBuffer byteBuffer, final int scaleBits) { + public int advanceSymbol4x8(final int rIn, final ByteBuffer byteBuffer, final int scaleBits) { final int mask = (1 << scaleBits) - 1; // s, x = D(x) @@ -60,11 +60,11 @@ public int advanceSymbol(final int rIn, final ByteBuffer byteBuffer, final int s r = freq * (r >> scaleBits) + (r & mask) - start; // re-normalize - if (r < Constants.RANS_BYTE_L_4x8) { + if (r < Constants.RANS_4x8_LOWER_BOUND) { do { final int b = 0xFF & byteBuffer.get(); r = (r << 8) | b; - } while (r < Constants.RANS_BYTE_L_4x8); + } while (r < Constants.RANS_4x8_LOWER_BOUND); } return r; @@ -78,7 +78,7 @@ public int advanceSymbolNx16(final int rIn, final ByteBuffer byteBuffer, final i r = freq * (r >> scaleBits) + (r & mask) - start; // re-normalize - if (r < (Constants.RANS_BYTE_L_Nx16)){ + if (r < (Constants.RANS_Nx16_LOWER_BOUND)){ int i = 0xFF & byteBuffer.get(); i |= (0xFF & byteBuffer.get())<<8; r = (r << 16) + i; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java index 8d10dff149..43bf5ad46a 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java @@ -46,4 +46,6 @@ protected ByteBuffer allocateOutputBuffer(final int inSize) { return outputBuffer; } + //TODO: add buildSymbols0 and buildSymbols1 + } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java index e4ce815a25..2e76ce9993 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java @@ -34,7 +34,7 @@ public static void reverse(final ByteBuffer byteBuffer) { } // Returns the current cumulative frequency (map it to a symbol yourself!) - public static int RANSDecodeGet(final int r, final int scaleBits) { + public static int RANSGetCumulativeFrequency(final int r, final int scaleBits) { return r & ((1 << scaleBits) - 1); } @@ -43,52 +43,52 @@ public static int RANSDecodeRenormalize4x8(int r, final ByteBuffer byteBuffer) { // re-normalize //rans4x8 - if (r < Constants.RANS_BYTE_L_4x8) { + if (r < Constants.RANS_4x8_LOWER_BOUND) { do { r = (r << 8) | (0xFF & byteBuffer.get()); - } while (r < Constants.RANS_BYTE_L_4x8); + } while (r < Constants.RANS_4x8_LOWER_BOUND); } return r; } public static int RANSDecodeRenormalizeNx16(int r, final ByteBuffer byteBuffer) { // ransNx16 - if (r < (Constants.RANS_BYTE_L_Nx16)) { + if (r < (Constants.RANS_Nx16_LOWER_BOUND)) { int i = (0xFF & byteBuffer.get()); - i |= (0xFF & byteBuffer.get()) <<8; + i |= (0xFF & byteBuffer.get()) << 8; r = (r << 16) | i; } return r; } - public static void writeUint7(int i, ByteBuffer cp){ + public static void writeUint7(int i, ByteBuffer cp) { int s = 0; int X = i; do { s += 7; X >>= 7; - }while (X>0); + } while (X > 0); do { - s -=7; + s -= 7; //writeByte - int s_ = (s > 0)?1:0; + int s_ = (s > 0) ? 1 : 0; cp.put((byte) (((i >> s) & 0x7f) + (s_ << 7))); - } while (s>0); + } while (s > 0); } - public static int readUint7(ByteBuffer cp){ + public static int readUint7(ByteBuffer cp) { int i = 0; int c; do { //read byte c = cp.get(); - i = (i<<7) | (c & 0x7f); - } while((c & 0x80)!=0); + i = (i << 7) | (c & 0x7f); + } while ((c & 0x80) != 0); return i; } - public static int[] normaliseFrequenciesOrder0(final int[] F, final int bits) { + public static void normaliseFrequenciesOrder0(final int[] F, final int bits) { // Returns an array of normalised Frequencies, // such that the frequencies add up to 1<0)?(((long) (renormFreq) << 31) / T + (1 << 30) / T):0; // keep track of the symbol that has the maximum frequency // in the input Frequency array. @@ -108,36 +111,55 @@ public static int[] normaliseFrequenciesOrder0(final int[] F, final int bits) { // that the total normalized frequencies add up to "renormFreq" value. int m = 0; int M = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (m < F[j]) { - m = F[j]; - M = j; + for (int symbol = 0; symbol < Constants.NUMBER_OF_SYMBOLS; symbol++) { + if (m < F[symbol]) { + m = F[symbol]; + M = symbol; } } int fsum = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F[j] == 0) { + for (int symbol = 0; symbol < Constants.NUMBER_OF_SYMBOLS; symbol++) { + if (F[symbol] == 0) { continue; } // using tr to normalize symbol frequencies such that their total = renormFreq - if ((F[j] = (int) ((F[j] * tr) >> 31)) == 0) { + if ((F[symbol] = (int) ((F[symbol] * tr) >> 31)) == 0) { // A non-zero symbol frequency should not be incorrectly set to 0. // If the calculated value is 0, change it to 1 - F[j] = 1; + F[symbol] = 1; } - fsum += F[j]; + fsum += F[symbol]; } // adjust the frequency of the symbol "M" such that // the sum of frequencies of all the symbols = renormFreq if (fsum < renormFreq) { F[M] += renormFreq - fsum; - } else if (fsum > renormFreq){ + } else if (fsum > renormFreq) { F[M] -= fsum - renormFreq; } - return F; + } + + public static void normaliseFrequenciesOrder1(final int[][] F, final int shift, final boolean constantShift) { + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (F[Constants.NUMBER_OF_SYMBOLS][j]==0){ + continue; + } + int bitSize = shift; + if (!constantShift) { + // log2 N = Math.log(N)/Math.log(2) + bitSize = (int) Math.ceil(Math.log(F[Constants.NUMBER_OF_SYMBOLS][j]) / Math.log(2)); + + if (bitSize > shift) + bitSize = shift; + } + // special case -> if a symbol occurs only once and at the end of the input, + // then the order 0 freq table associated with it should have a frequency of 1 for symbol 0 + // i.e, F[sym][0] = 1 + normaliseFrequenciesOrder0(F[j], bitSize); + } } } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D04.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D04.java index 1048664fcc..c4b6120652 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D04.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D04.java @@ -25,20 +25,20 @@ static void uncompress( final int out_sz = outBuffer.remaining(); final int out_end = (out_sz & ~3); for (int i = 0; i < out_end; i += 4) { - final byte c0 = D.R[Utils.RANSDecodeGet(rans0, Constants.TF_SHIFT)]; - final byte c1 = D.R[Utils.RANSDecodeGet(rans1, Constants.TF_SHIFT)]; - final byte c2 = D.R[Utils.RANSDecodeGet(rans2, Constants.TF_SHIFT)]; - final byte c3 = D.R[Utils.RANSDecodeGet(rans3, Constants.TF_SHIFT)]; + final byte c0 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; + final byte c1 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; + final byte c2 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; + final byte c3 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans3, Constants.TOTAL_FREQ_SHIFT)]; outBuffer.put(i, c0); outBuffer.put(i + 1, c1); outBuffer.put(i + 2, c2); outBuffer.put(i + 3, c3); - rans0 = syms[0xFF & c0].advanceSymbolStep(rans0, Constants.TF_SHIFT); - rans1 = syms[0xFF & c1].advanceSymbolStep(rans1, Constants.TF_SHIFT); - rans2 = syms[0xFF & c2].advanceSymbolStep(rans2, Constants.TF_SHIFT); - rans3 = syms[0xFF & c3].advanceSymbolStep(rans3, Constants.TF_SHIFT); + rans0 = syms[0xFF & c0].advanceSymbolStep(rans0, Constants.TOTAL_FREQ_SHIFT); + rans1 = syms[0xFF & c1].advanceSymbolStep(rans1, Constants.TOTAL_FREQ_SHIFT); + rans2 = syms[0xFF & c2].advanceSymbolStep(rans2, Constants.TOTAL_FREQ_SHIFT); + rans3 = syms[0xFF & c3].advanceSymbolStep(rans3, Constants.TOTAL_FREQ_SHIFT); rans0 = Utils.RANSDecodeRenormalize4x8(rans0, inBuffer); rans1 = Utils.RANSDecodeRenormalize4x8(rans1, inBuffer); @@ -53,32 +53,32 @@ static void uncompress( break; case 1: - c = D.R[Utils.RANSDecodeGet(rans0, Constants.TF_SHIFT)]; - syms[0xFF & c].advanceSymbol(rans0, inBuffer, Constants.TF_SHIFT); + c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & c].advanceSymbol4x8(rans0, inBuffer, Constants.TOTAL_FREQ_SHIFT); outBuffer.put(c); break; case 2: - c = D.R[Utils.RANSDecodeGet(rans0, Constants.TF_SHIFT)]; - syms[0xFF & c].advanceSymbol(rans0, inBuffer, Constants.TF_SHIFT); + c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & c].advanceSymbol4x8(rans0, inBuffer, Constants.TOTAL_FREQ_SHIFT); outBuffer.put(c); - c = D.R[Utils.RANSDecodeGet(rans1, Constants.TF_SHIFT)]; - syms[0xFF & c].advanceSymbol(rans1, inBuffer, Constants.TF_SHIFT); + c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & c].advanceSymbol4x8(rans1, inBuffer, Constants.TOTAL_FREQ_SHIFT); outBuffer.put(c); break; case 3: - c = D.R[Utils.RANSDecodeGet(rans0, Constants.TF_SHIFT)]; - syms[0xFF & c].advanceSymbol(rans0, inBuffer, Constants.TF_SHIFT); + c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & c].advanceSymbol4x8(rans0, inBuffer, Constants.TOTAL_FREQ_SHIFT); outBuffer.put(c); - c = D.R[Utils.RANSDecodeGet(rans1, Constants.TF_SHIFT)]; - syms[0xFF & c].advanceSymbol(rans1, inBuffer, Constants.TF_SHIFT); + c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & c].advanceSymbol4x8(rans1, inBuffer, Constants.TOTAL_FREQ_SHIFT); outBuffer.put(c); - c = D.R[Utils.RANSDecodeGet(rans2, Constants.TF_SHIFT)]; - syms[0xFF & c].advanceSymbol(rans2, inBuffer, Constants.TF_SHIFT); + c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & c].advanceSymbol4x8(rans2, inBuffer, Constants.TOTAL_FREQ_SHIFT); outBuffer.put(c); break; } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D14.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D14.java index 37c4650597..7385b92f22 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D14.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D14.java @@ -32,20 +32,20 @@ static void uncompress( int l2 = 0; int l7 = 0; for (; i0 < isz4; i0++, i1++, i2++, i7++) { - final int c0 = 0xFF & D[l0].R[Utils.RANSDecodeGet(rans0, Constants.TF_SHIFT)]; - final int c1 = 0xFF & D[l1].R[Utils.RANSDecodeGet(rans1, Constants.TF_SHIFT)]; - final int c2 = 0xFF & D[l2].R[Utils.RANSDecodeGet(rans2, Constants.TF_SHIFT)]; - final int c7 = 0xFF & D[l7].R[Utils.RANSDecodeGet(rans7, Constants.TF_SHIFT)]; + final int c0 = 0xFF & D[l0].reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; + final int c1 = 0xFF & D[l1].reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; + final int c2 = 0xFF & D[l2].reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; + final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; outBuffer.put(i0, (byte) c0); outBuffer.put(i1, (byte) c1); outBuffer.put(i2, (byte) c2); outBuffer.put(i7, (byte) c7); - rans0 = syms[l0][c0].advanceSymbolStep(rans0, Constants.TF_SHIFT); - rans1 = syms[l1][c1].advanceSymbolStep(rans1, Constants.TF_SHIFT); - rans2 = syms[l2][c2].advanceSymbolStep(rans2, Constants.TF_SHIFT); - rans7 = syms[l7][c7].advanceSymbolStep(rans7, Constants.TF_SHIFT); + rans0 = syms[l0][c0].advanceSymbolStep(rans0, Constants.TOTAL_FREQ_SHIFT); + rans1 = syms[l1][c1].advanceSymbolStep(rans1, Constants.TOTAL_FREQ_SHIFT); + rans2 = syms[l2][c2].advanceSymbolStep(rans2, Constants.TOTAL_FREQ_SHIFT); + rans7 = syms[l7][c7].advanceSymbolStep(rans7, Constants.TOTAL_FREQ_SHIFT); rans0 = Utils.RANSDecodeRenormalize4x8(rans0, inBuffer); rans1 = Utils.RANSDecodeRenormalize4x8(rans1, inBuffer); @@ -60,9 +60,9 @@ static void uncompress( // Remainder for (; i7 < out_sz; i7++) { - final int c7 = 0xFF & D[l7].R[Utils.RANSDecodeGet(rans7, Constants.TF_SHIFT)]; + final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; outBuffer.put(i7, (byte) c7); - rans7 = syms[l7][c7].advanceSymbol(rans7, inBuffer, Constants.TF_SHIFT); + rans7 = syms[l7][c7].advanceSymbol4x8(rans7, inBuffer, Constants.TOTAL_FREQ_SHIFT); l7 = c7; } } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java index 10612475f1..09bd9b59c9 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java @@ -14,10 +14,10 @@ static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[] syms, int rans0, rans1, rans2, rans3; final ByteBuffer ptr = cp.slice(); - rans0 = Constants.RANS_BYTE_L_4x8; - rans1 = Constants.RANS_BYTE_L_4x8; - rans2 = Constants.RANS_BYTE_L_4x8; - rans3 = Constants.RANS_BYTE_L_4x8; + rans0 = Constants.RANS_4x8_LOWER_BOUND; + rans1 = Constants.RANS_4x8_LOWER_BOUND; + rans2 = Constants.RANS_4x8_LOWER_BOUND; + rans3 = Constants.RANS_4x8_LOWER_BOUND; int i; switch (i = (in_size & 3)) { diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java index e0234c89b4..198cda6a42 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java @@ -13,10 +13,10 @@ static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[][] syms final int in_size = inBuffer.remaining(); final int compressedBlobSize; int rans0, rans1, rans2, rans3; - rans0 = Constants.RANS_BYTE_L_4x8; - rans1 = Constants.RANS_BYTE_L_4x8; - rans2 = Constants.RANS_BYTE_L_4x8; - rans3 = Constants.RANS_BYTE_L_4x8; + rans0 = Constants.RANS_4x8_LOWER_BOUND; + rans1 = Constants.RANS_4x8_LOWER_BOUND; + rans2 = Constants.RANS_4x8_LOWER_BOUND; + rans3 = Constants.RANS_4x8_LOWER_BOUND; /* * Slicing is needed for buffer reversing later. diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index f43f6aeb00..b13e211fcb 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -70,18 +70,18 @@ private void readStatsOrder0(final ByteBuffer cp) { int x = 0; int j = cp.get() & 0xFF; do { - if ((decoder.fc[j].F = (cp.get() & 0xFF)) >= 128) { - decoder.fc[j].F &= ~128; - decoder.fc[j].F = ((decoder.fc[j].F & 127) << 8) | (cp.get() & 0xFF); + if ((decoder.freq[j] = (cp.get() & 0xFF)) >= 0x80) { + decoder.freq[j] &= ~0x80; + decoder.freq[j] = ((decoder.freq[j] & 0x7F) << 8) | (cp.get() & 0xFF); } - decoder.fc[j].C = x; + decoder.cumulativeFreq[j] = x; - decodingSymbols[j].set(decoder.fc[j].C, decoder.fc[j].F); + decodingSymbols[j].set(decoder.cumulativeFreq[j], decoder.freq[j]); /* Build reverse lookup table */ - Arrays.fill(decoder.R, x, x + decoder.fc[j].F, (byte) j); + Arrays.fill(decoder.reverseLookup, x, x + decoder.freq[j], (byte) j); - x += decoder.fc[j].F; + x += decoder.freq[j]; if (rle == 0 && j + 1 == (0xFF & cp.get(cp.position()))) { j = cp.get() & 0xFF; @@ -94,7 +94,7 @@ private void readStatsOrder0(final ByteBuffer cp) { } } while (j != 0); - assert (x < Constants.TOTFREQ); + assert (x < Constants.TOTAL_FREQ); } private void readStatsOrder1(final ByteBuffer cp) { @@ -107,26 +107,26 @@ private void readStatsOrder1(final ByteBuffer cp) { int x = 0; int j = 0xFF & cp.get(); do { - if ((D[i].fc[j].F = (0xFF & cp.get())) >= 128) { - D[i].fc[j].F &= ~128; - D[i].fc[j].F = ((D[i].fc[j].F & 127) << 8) | (0xFF & cp.get()); + if ((D[i].freq[j] = (0xFF & cp.get())) >= 0x80) { + D[i].freq[j] &= ~0x80; + D[i].freq[j] = ((D[i].freq[j] & 0x7F) << 8) | (0xFF & cp.get()); } - D[i].fc[j].C = x; + D[i].cumulativeFreq[j] = x; - if (D[i].fc[j].F == 0) { - D[i].fc[j].F = Constants.TOTFREQ; + if (D[i].freq[j] == 0) { + D[i].freq[j] = Constants.TOTAL_FREQ; } decodingSymbols[i][j].set( - D[i].fc[j].C, - D[i].fc[j].F + D[i].cumulativeFreq[j], + D[i].freq[j] ); /* Build reverse lookup table */ - Arrays.fill(D[i].R, x, x + D[i].fc[j].F, (byte) j); + Arrays.fill(D[i].reverseLookup, x, x + D[i].freq[j], (byte) j); - x += D[i].fc[j].F; - assert (x <= Constants.TOTFREQ); + x += D[i].freq[j]; + assert (x <= Constants.TOTAL_FREQ); if (rle_j == 0 && j + 1 == (0xFF & cp.get(cp.position()))) { j = (0xFF & cp.get()); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index 097c332522..fb8c0e0b41 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -8,6 +8,8 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; +import static htsjdk.samtools.cram.compression.rans.Constants.NUMBER_OF_SYMBOLS; + public class RANS4x8Encode extends RANSEncode { private static final int ORDER_BYTE_LENGTH = 1; private static final int COMPRESSED_BYTE_LENGTH = 4; @@ -114,6 +116,7 @@ private static void writeCompressionPrefix( } private static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { + // TODO: remove duplicate code -use Utils.normalise here final int inSize = inBuffer.remaining(); // Compute statistics @@ -126,7 +129,7 @@ private static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { F[0xFF & inBuffer.get()]++; T++; } - final long tr = ((long) Constants.TOTFREQ << 31) / T + (1 << 30) / T; + final long tr = ((long) Constants.TOTAL_FREQ << 31) / T + (1 << 30) / T; // Normalise so T[i] == TOTFREQ // m is the maximum frequency value @@ -157,10 +160,10 @@ private static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { fsum++; // adjust the frequency of the symbol with maximum frequency to make sure that // the sum of frequencies of all the symbols = 4096 - if (fsum < Constants.TOTFREQ) { - F[M] += Constants.TOTFREQ - fsum; + if (fsum < Constants.TOTAL_FREQ) { + F[M] += Constants.TOTAL_FREQ - fsum; } else { - F[M] -= fsum - Constants.TOTFREQ; + F[M] -= fsum - Constants.TOTAL_FREQ; } assert (F[M] > 0); return F; @@ -189,7 +192,7 @@ private static int[][] calcFrequenciesOrder1(final ByteBuffer in) { continue; } - final double p = ((double) Constants.TOTFREQ) / T[i]; + final double p = ((double) Constants.TOTAL_FREQ) / T[i]; int t2 = 0, m = 0, M = 0; for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if (F[i][j] == 0) @@ -206,49 +209,45 @@ private static int[][] calcFrequenciesOrder1(final ByteBuffer in) { } t2++; - if (t2 < Constants.TOTFREQ) { - F[i][M] += Constants.TOTFREQ - t2; + if (t2 < Constants.TOTAL_FREQ) { + F[i][M] += Constants.TOTAL_FREQ - t2; } else { - F[i][M] -= t2 - Constants.TOTFREQ; + F[i][M] -= t2 - Constants.TOTAL_FREQ; } } return F; } - private RANSEncodingSymbol[] buildSymsOrder0(final int[] F) { - final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; + private void buildSymsOrder0(final int[] F) { + final RANSEncodingSymbol[] encodingSymbols = getEncodingSymbols()[0]; final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; // T = running sum of frequencies including the current symbol // F[j] = frequency of symbol "j" // C[j] = cumulative frequency of all the symbols preceding "j" (and excluding the frequency of symbol "j") - int T = 0; + int cumulativeFreq = 0; for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - C[j] = T; - T += F[j]; if (F[j] != 0) { //For each symbol, set start = cumulative frequency and freq = frequency - syms[j].set(C[j], F[j], Constants.TF_SHIFT); + encodingSymbols[j].set(cumulativeFreq, F[j], Constants.TOTAL_FREQ_SHIFT); + cumulativeFreq += F[j]; } } - return syms; } - private RANSEncodingSymbol[][] buildSymsOrder1(final int[][] F) { - final RANSEncodingSymbol[][] syms = getEncodingSymbols(); + private void buildSymsOrder1(final int[][] F) { + final RANSEncodingSymbol[][] encodingSymbols = getEncodingSymbols(); for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { final int[] F_i_ = F[i]; - int x = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F_i_[j] != 0) { - syms[i][j].set(x, F_i_[j], Constants.TF_SHIFT); - x += F_i_[j]; + int cumulativeFreq = 0; + for (int symbol = 0; symbol < Constants.NUMBER_OF_SYMBOLS; symbol++) { + if (F_i_[symbol] != 0) { + encodingSymbols[i][symbol].set(cumulativeFreq, F_i_[symbol], Constants.TOTAL_FREQ_SHIFT); + cumulativeFreq += F_i_[symbol]; } } } - - return syms; } private static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { @@ -270,7 +269,7 @@ private static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { // Note: maximum possible rle = 254 // rle requires atmost 1 byte if (rle == 0 && j != 0 && F[j - 1] != 0) { - for (rle = j + 1; rle < 256 && F[rle] != 0; rle++) + for (rle = j + 1; rle < NUMBER_OF_SYMBOLS && F[rle] != 0; rle++) ; rle -= j + 1; cp.put((byte) rle); @@ -318,7 +317,7 @@ private static int writeFrequenciesOrder1(final ByteBuffer cp, final int[][] F) // FIXME: could use order-0 statistics to observe which alphabet // symbols are present and base RLE on that ordering instead. if (i != 0 && T[i - 1] != 0) { - for (rle_i = i + 1; rle_i < 256 && T[rle_i] != 0; rle_i++) + for (rle_i = i + 1; rle_i < NUMBER_OF_SYMBOLS && T[rle_i] != 0; rle_i++) ; rle_i -= i + 1; cp.put((byte) rle_i); @@ -336,7 +335,7 @@ private static int writeFrequenciesOrder1(final ByteBuffer cp, final int[][] F) } else { cp.put((byte) j); if (rle_j == 0 && j != 0 && F_i_[j - 1] != 0) { - for (rle_j = j + 1; rle_j < 256 && F_i_[rle_j] != 0; rle_j++) + for (rle_j = j + 1; rle_j < NUMBER_OF_SYMBOLS && F_i_[rle_j] != 0; rle_j++) ; rle_j -= j + 1; cp.put((byte) rle_j); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java index 0a2b3f6fbe..bb311e52ed 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java @@ -33,9 +33,9 @@ static void uncompress( for (r=0; r0){ - byte symbol = D.R[Utils.RANSDecodeGet(rans[rev_idx], Constants.TF_SHIFT)]; - syms[0xFF & symbol].advanceSymbolNx16(rans[rev_idx], inBuffer, Constants.TF_SHIFT); + byte symbol = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans[rev_idx], Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & symbol].advanceSymbolNx16(rans[rev_idx], inBuffer, Constants.TOTAL_FREQ_SHIFT); outBuffer.put(symbol); remSize --; rev_idx ++; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java new file mode 100644 index 0000000000..c55cefa591 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java @@ -0,0 +1,75 @@ +package htsjdk.samtools.cram.compression.rans.ransnx16; + +import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; +import htsjdk.samtools.cram.compression.rans.Constants; +import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; +import htsjdk.samtools.cram.compression.rans.Utils; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public class D1N { + static void uncompress( + final ByteBuffer inBuffer, + final ByteBuffer outBuffer, + final ArithmeticDecoder[] D, + final RANSDecodingSymbol[][] syms, + final int Nway) { + + // uncompress for Nway = 4. then extend Nway to be variable - 4 or 32 + // TODO: Fails - unexpected symbol in the third iteration of the for loop. + final int out_sz = outBuffer.remaining(); + int rans0, rans1, rans2, rans7; + inBuffer.order(ByteOrder.LITTLE_ENDIAN); + rans0 = inBuffer.getInt(); + rans1 = inBuffer.getInt(); + rans2 = inBuffer.getInt(); + rans7 = inBuffer.getInt(); + + final int isz4 = out_sz >> 2; + int i0 = 0; + int i1 = isz4; + int i2 = 2 * isz4; + int i7 = 3 * isz4; + int l0 = 0; + int l1 = 0; + int l2 = 0; + int l7 = 0; + for (; i0 < isz4; i0++, i1++, i2++, i7++) { + final int c0 = 0xFF & D[l0].reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; + final int c1 = 0xFF & D[l1].reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; + final int c2 = 0xFF & D[l2].reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; + final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; + + outBuffer.put(i0, (byte) c0); + outBuffer.put(i1, (byte) c1); + outBuffer.put(i2, (byte) c2); + outBuffer.put(i7, (byte) c7); + + rans0 = syms[l0][c0].advanceSymbolStep(rans0, Constants.TOTAL_FREQ_SHIFT); + rans1 = syms[l1][c1].advanceSymbolStep(rans1, Constants.TOTAL_FREQ_SHIFT); + rans2 = syms[l2][c2].advanceSymbolStep(rans2, Constants.TOTAL_FREQ_SHIFT); + rans7 = syms[l7][c7].advanceSymbolStep(rans7, Constants.TOTAL_FREQ_SHIFT); + + rans0 = Utils.RANSDecodeRenormalizeNx16(rans0, inBuffer); + rans1 = Utils.RANSDecodeRenormalizeNx16(rans1, inBuffer); + rans2 = Utils.RANSDecodeRenormalizeNx16(rans2, inBuffer); + rans7 = Utils.RANSDecodeRenormalizeNx16(rans7, inBuffer); + + l0 = c0; + l1 = c1; + l2 = c2; + l7 = c7; + } + + // Remainder + for (; i7 < out_sz; i7++) { + final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; + // should this be write uint7? + outBuffer.put(i7, (byte) c7); + rans7 = syms[l7][c7].advanceSymbolNx16(rans7, inBuffer, Constants.TOTAL_FREQ_SHIFT); + l7 = c7; + } + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java index 18f3c405b4..bbd62c1e0a 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java @@ -21,7 +21,7 @@ static int compress( for (r=0; r> 2; + int i0 = isz4 - 2; + int i1 = 2 * isz4 - 2; + int i2 = 3 * isz4 - 2; + int i3 = 4 * isz4 - 2; + + int l0 = 0; + if (i0 + 1 >= 0) { + l0 = 0xFF & inBuffer.get(i0 + 1); + } + int l1 = 0; + if (i1 + 1 >= 0) { + l1 = 0xFF & inBuffer.get(i1 + 1); + } + int l2 = 0; + if (i2 + 1 >= 0) { + l2 = 0xFF & inBuffer.get(i2 + 1); + } + int l3; + + // Deal with the remainder + l3 = 0xFF & inBuffer.get(in_size - 1); + for (i3 = in_size - 2; i3 > 4 * isz4 - 2 && i3 >= 0; i3--) { + final int c3 = 0xFF & inBuffer.get(i3); + rans3 = syms[c3][l3].putSymbol(rans3, ptr); + l3 = c3; + } + + for (; i0 >= 0; i0--, i1--, i2--, i3--) { + final int c0 = 0xFF & inBuffer.get(i0); + final int c1 = 0xFF & inBuffer.get(i1); + final int c2 = 0xFF & inBuffer.get(i2); + final int c3 = 0xFF & inBuffer.get(i3); + + rans3 = syms[c3][l3].putSymbol(rans3, ptr); + rans2 = syms[c2][l2].putSymbol(rans2, ptr); + rans1 = syms[c1][l1].putSymbol(rans1, ptr); + rans0 = syms[c0][l0].putSymbol(rans0, ptr); + + l0 = c0; + l1 = c1; + l2 = c2; + l3 = c3; + } + + rans3 = syms[0][l3].putSymbol(rans3, ptr); + rans2 = syms[0][l2].putSymbol(rans2, ptr); + rans1 = syms[0][l1].putSymbol(rans1, ptr); + rans0 = syms[0][l0].putSymbol(rans0, ptr); + + ptr.order(ByteOrder.BIG_ENDIAN); + ptr.putInt(rans3); + ptr.putInt(rans2); + ptr.putInt(rans1); + ptr.putInt(rans0); + ptr.flip(); + compressedBlobSize = ptr.limit(); + Utils.reverse(ptr); + /* + * Depletion of the in buffer cannot be confirmed because of the get(int + * position) method use during encoding, hence enforcing: + */ + inBuffer.position(inBuffer.limit()); + return compressedBlobSize; + } +} diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index fa4755613b..0a1c402ea9 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -13,6 +13,7 @@ public class RANSNx16Decode extends RANSDecode{ private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + private static final int FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK = 0x01; public ByteBuffer uncompress(final ByteBuffer inBuffer, final RANSNx16Params params) { if (inBuffer.remaining() == 0) { @@ -54,11 +55,11 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, final RANSNx16Params pa else { switch (order){ case ZERO: - outBuffer = uncompressOrder0WayN(inBuffer,outBuffer, n_out,Nway); + outBuffer = uncompressOrder0WayN(inBuffer, outBuffer, n_out, Nway); + break; + case ONE: + outBuffer = uncompressOrder1WayN(inBuffer, outBuffer, n_out, Nway); break; -// case ONE: -// uncompressOrder1WayN(inBuffer,n_out, Nway); -// break; default: throw new RuntimeException("Unknown rANS order: " + order); } @@ -66,28 +67,70 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, final RANSNx16Params pa return outBuffer; } - private ByteBuffer uncompressOrder0WayN(final ByteBuffer inBuffer, final ByteBuffer outBuffer,final int n_out,final int Nway) { + private ByteBuffer uncompressOrder0WayN( + final ByteBuffer inBuffer, + final ByteBuffer outBuffer, + final int n_out, + final int Nway) { + // read the frequency table, get the normalised frequencies and use it to set the RANSDecodingSymbols - readStatsOrder0(inBuffer); + readFrequencyTableOrder0(inBuffer); + // uncompress using Nway rans states D0N.uncompress(inBuffer, getD()[0], getDecodingSymbols()[0], outBuffer,n_out,Nway); return outBuffer; } - private void readStatsOrder0( + private ByteBuffer uncompressOrder1WayN( + final ByteBuffer inBuffer, + final ByteBuffer outBuffer, + final int n_out, + final int Nway) { + + // TODO: does not work as expected. Need to fix! + // read the first byte and calculate the bit shift + int frequencyTableFirstByte = (inBuffer.get() & 0xFF); + int shift = frequencyTableFirstByte >> 4; + boolean optionalCompressFlag = ((frequencyTableFirstByte & FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK)!=0); + ByteBuffer freqTableSource; + if (optionalCompressFlag) { + + // if optionalCompressFlag is true, the frequency table was compressed using RANS Nx16, N=4 + final int uncompressedLength = Utils.readUint7(inBuffer); + final int compressedLength = Utils.readUint7(inBuffer); + byte[] compressedFreqTable = new byte[compressedLength]; + + // read compressedLength bytes into compressedFreqTable byte array + inBuffer.get(compressedFreqTable,0,compressedLength); + + // decode the compressedFreqTable to get the uncompressedFreqTable + freqTableSource = ByteBuffer.allocate(uncompressedLength); + ByteBuffer compressedFrequencyTableBuffer = ByteBuffer.wrap(compressedFreqTable); + compressedFrequencyTableBuffer.order(ByteOrder.LITTLE_ENDIAN); + uncompressOrder0WayN(compressedFrequencyTableBuffer, freqTableSource, uncompressedLength,4); + } + else { + freqTableSource = inBuffer; + } + readFrequencyTableOrder1(freqTableSource, shift); + D1N.uncompress(inBuffer, outBuffer, getD(), getDecodingSymbols(), Nway); + return outBuffer; + } + + private void readFrequencyTableOrder0( final ByteBuffer cp) { final ArithmeticDecoder decoder = getD()[0]; final RANSDecodingSymbol[] decodingSymbols = getDecodingSymbols()[0]; // Use the Frequency table to set the values of F, C and R final int[] A = readAlphabet(cp); - int x = 0; + int cumulativeFreq = 0; final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; // read F, normalise F then calculate C and R for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if (A[j] > 0) { - if ((F[j] = (cp.get() & 0xFF)) >= 128){ - F[j] &= ~128; + if ((F[j] = (cp.get() & 0xFF)) >= 0x80){ + F[j] &= ~0x80; F[j] = (( F[j] &0x7f) << 7) | (cp.get() & 0x7F); } } @@ -98,13 +141,58 @@ private void readStatsOrder0( // decoder.fc[j].F -> Frequency // decoder.fc[j].C -> Cumulative Frequency preceding the current symbol - decoder.fc[j].F = F[j]; - decoder.fc[j].C = x; - decodingSymbols[j].set(decoder.fc[j].C, decoder.fc[j].F); + decoder.freq[j] = F[j]; + decoder.cumulativeFreq[j] = cumulativeFreq; + decodingSymbols[j].set(decoder.cumulativeFreq[j], decoder.freq[j]); // R -> Reverse Lookup table - Arrays.fill(decoder.R, x, x + decoder.fc[j].F, (byte) j); - x += decoder.fc[j].F; + Arrays.fill(decoder.reverseLookup, cumulativeFreq, cumulativeFreq + decoder.freq[j], (byte) j); + cumulativeFreq += decoder.freq[j]; + } + } + } + + private void readFrequencyTableOrder1( + final ByteBuffer cp, + int shift) { + final int[][] F = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; + final int[][] C = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; + final ArithmeticDecoder[] D = getD(); + final RANSDecodingSymbol[][] decodingSymbols = getDecodingSymbols(); + final int[] A = readAlphabet(cp); + + for (int i=0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + if (A[i] > 0) { + int run = 0; + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (A[j] > 0) { + if (run > 0) { + run--; + } else { + F[i][j] = Utils.readUint7(cp); + if (F[i][j] == 0){ + run = Utils.readUint7(cp); + } + } + } + } + + // For each symbol, normalise it's order 0 frequency table + Utils.normaliseFrequenciesOrder0(F[i],shift); + int cumulativeFreq=0; + + // set decoding symbols + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + D[i].freq[j]=F[i][j]; + D[i].cumulativeFreq[j]=cumulativeFreq; + decodingSymbols[i][j].set( + D[i].cumulativeFreq[j], + D[i].freq[j] + ); + /* Build reverse lookup table */ + Arrays.fill(D[i].reverseLookup, cumulativeFreq, cumulativeFreq + D[i].freq[j], (byte) j); + cumulativeFreq+=F[i][j]; + } } } } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index 7ed2cbe6c0..4318ba7660 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -7,6 +7,7 @@ import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; +import java.nio.ByteOrder; public class RANSNx16Encode extends RANSEncode { private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); @@ -54,8 +55,8 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params param switch (order) { case ZERO: return compressOrder0WayN(inBuffer, Nway, outBuffer); -// case ONE: -// return compressOrder1WayN(inBuffer, Nway, outBuffer); + case ONE: + return compressOrder1WayN(inBuffer, Nway, outBuffer); default: throw new RuntimeException("Unknown rANS order: " + order); } @@ -66,11 +67,7 @@ private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final int Nway, final int[] F = buildFrequenciesOrder0(inBuffer); final ByteBuffer cp = outBuffer.slice(); int bitSize = (int) Math.ceil(Math.log(inSize) / Math.log(2)); - if (bitSize == 0) { - // TODO: check this! - // If there is just one symbol, bitsize = log (1)/log(2) = 0. - bitSize = 1; - } + if (bitSize > 12) { bitSize = 12; } @@ -94,6 +91,69 @@ private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final int Nway, return outBuffer; } + private ByteBuffer compressOrder1WayN(final ByteBuffer inBuffer, final int Nway, final ByteBuffer outBuffer) { + //TODO: does not work as expected. Need to fix + final ByteBuffer cp = outBuffer.slice(); + final int[][] F = buildFrequenciesOrder1(inBuffer, Nway); + final int shift = 12; + + // normalise frequencies with a variable shift calculated + // using the minimum bit size that is needed to represent a frequency context array + Utils.normaliseFrequenciesOrder1(F, shift, false); + final int prefix_size = outBuffer.position(); + + // TODO: How is the buffer size calculated? js: 257*257*3+9 + ByteBuffer frequencyTable = allocateOutputBuffer(1); + ByteBuffer compressedFrequencyTable = allocateOutputBuffer(1); + + // uncompressed frequency table + final int uncompressedFrequencyTableSize = writeFrequenciesOrder1(frequencyTable,F); + frequencyTable.limit(uncompressedFrequencyTableSize); + frequencyTable.rewind(); + + // compressed frequency table using RANS Nx16 Order 0 + compressedFrequencyTable = compressOrder0WayN(frequencyTable,4,compressedFrequencyTable); + frequencyTable.rewind(); + int compressedFrequencyTableSize = compressedFrequencyTable.limit(); + + if (compressedFrequencyTableSize < uncompressedFrequencyTableSize) { + + // first byte + cp.put((byte) (1 | shift << 4 )); + Utils.writeUint7(uncompressedFrequencyTableSize,cp); + Utils.writeUint7(compressedFrequencyTableSize,cp); + + // write bytes from compressedFrequencyTable to cp + int i=0; + while (i 0) { + run--; + } else { + Utils.writeUint7(F[i][j],cp); + if (F[i][j] == 0) { + // Count how many more zero-freqs we have + for (int k = j+1; k < Constants.NUMBER_OF_SYMBOLS; k++) { + if (F[Constants.NUMBER_OF_SYMBOLS][k] == 0) { + continue; + } + if (F[i][k] == 0) { + run++; + } else { + break; + } + } + cp.put((byte) run); + } + } + } + } + return cp.position() - start; + } + private static void writeAlphabet(final ByteBuffer cp, final int[] F) { // Uses Run Length Encoding to write all the symbols whose frequency!=0 int rle = 0; @@ -160,25 +291,37 @@ private static void writeAlphabet(final ByteBuffer cp, final int[] F) { cp.put((byte) 0); } - private RANSEncodingSymbol[] buildSymsOrder0(final int[] F) { + private void buildSymsOrder0(final int[] F) { final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; // updates the RANSEncodingSymbol array for all the symbols final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; // T = running sum of frequencies including the current symbol // F[j] = frequency of symbol "j" - // C[j] = cumulative frequency of all the symbols preceding "j" (excluding the frequency of symbol "j") - int T = 0; + // cumulativeFreq = cumulative frequency of all the symbols preceding "j" (excluding the frequency of symbol "j") + int cumulativeFreq = 0; for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - C[j] = T; - T += F[j]; if (F[j] != 0) { //For each symbol, set start = cumulative frequency and freq = frequency - syms[j].set(C[j], F[j], Constants.TF_SHIFT); + syms[j].set(cumulativeFreq, F[j], Constants.TOTAL_FREQ_SHIFT); + cumulativeFreq += F[j]; + } + } + } + + private void buildSymsOrder1(final int[][] F) { + final RANSEncodingSymbol[][] encodingSymbols = getEncodingSymbols(); + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + final int[] F_i_ = F[i]; + int cumulativeFreq = 0; + for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { + if (F_i_[j] != 0) { + encodingSymbols[i][j].set(cumulativeFreq, F_i_[j], Constants.TOTAL_FREQ_SHIFT); + cumulativeFreq += F_i_[j]; + } } } - return syms; } } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index d6a0eea6b1..b4e6f002e0 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -27,9 +27,9 @@ public class RansTest extends HtsjdkTest { // Since some of our test cases use very large byte arrays, so enclose them in a wrapper class since // otherwise IntelliJ serializes them to strings for display in the test output, which is *super*-slow. - private static class TestCaseWrapper { + private static class TestDataEnvelope { public final byte[] testArray; - public TestCaseWrapper(final byte[] testdata) { + public TestDataEnvelope(final byte[] testdata) { this.testArray = testdata; } public String toString() { @@ -39,29 +39,30 @@ public String toString() { public Object[][] getRansTestData() { return new Object[][] { - { new TestCaseWrapper(new byte[]{}) }, - { new TestCaseWrapper(new byte[] {0}) }, - { new TestCaseWrapper(new byte[] {0, 1}) }, - { new TestCaseWrapper(new byte[] {0, 1, 2}) }, - { new TestCaseWrapper(new byte[] {0, 1, 2, 3}) }, - { new TestCaseWrapper(new byte[1000]) }, - { new TestCaseWrapper(getNBytesWithValues(1000, (n, index) -> (byte) 1)) }, - { new TestCaseWrapper(getNBytesWithValues(1000, (n, index) -> Byte.MIN_VALUE)) }, - { new TestCaseWrapper(getNBytesWithValues(1000, (n, index) -> Byte.MAX_VALUE)) }, - { new TestCaseWrapper(getNBytesWithValues(1000, (n, index) -> (byte) index.intValue())) }, - { new TestCaseWrapper(getNBytesWithValues(1000, (n, index) -> index < n / 2 ? (byte) 0 : (byte) 1)) }, - { new TestCaseWrapper(getNBytesWithValues(1000, (n, index) -> index < n % 2 ? (byte) 0 : (byte) 1)) }, - { new TestCaseWrapper(randomBytesFromGeometricDistribution(1000, 0.1)) }, - { new TestCaseWrapper(randomBytesFromGeometricDistribution(1000, 0.01)) }, - { new TestCaseWrapper(randomBytesFromGeometricDistribution(10 * 1000 * 1000 + 1, 0.01)) }, + { new TestDataEnvelope(new byte[]{}) }, + { new TestDataEnvelope(new byte[] {0}) }, + { new TestDataEnvelope(new byte[] {0, 1}) }, + { new TestDataEnvelope(new byte[] {0, 1, 2}) }, + { new TestDataEnvelope(new byte[] {0, 1, 2, 3}) }, + { new TestDataEnvelope(new byte[1000]) }, + { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> (byte) 1)) }, + { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> Byte.MIN_VALUE)) }, + { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> Byte.MAX_VALUE)) }, + { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> (byte) index.intValue())) }, + { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> index < n / 2 ? (byte) 0 : (byte) 1)) }, + { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> index < n % 2 ? (byte) 0 : (byte) 1)) }, + { new TestDataEnvelope(randomBytesFromGeometricDistribution(1000, 0.1)) }, + { new TestDataEnvelope(randomBytesFromGeometricDistribution(1000, 0.01)) }, + { new TestDataEnvelope(randomBytesFromGeometricDistribution(10 * 1000 * 1000 + 1, 0.01)) }, }; } public Object[][] getRansTestDataTinySmallLarge() { return new Object[][]{ - { new TestCaseWrapper(randomBytesFromGeometricDistribution(100, 0.1)), 1, 100 }, // Tiny - { new TestCaseWrapper(randomBytesFromGeometricDistribution(1000, 0.01)), 4, 1000 }, // Small - { new TestCaseWrapper(randomBytesFromGeometricDistribution(100 * 1000 + 3, 0.01)), 100 * 1000 + 3 - 4, 100 * 1000 + 3 } // Large + // params: test data, lower limit, upper limit + { new TestDataEnvelope(randomBytesFromGeometricDistribution(100, 0.1)), 1, 100 }, // Tiny + { new TestDataEnvelope(randomBytesFromGeometricDistribution(1000, 0.01)), 4, 1000 }, // Small + { new TestDataEnvelope(randomBytesFromGeometricDistribution(100 * 1000 + 3, 0.01)), 100 * 1000 + 3 - 4, 100 * 1000 + 3 } // Large }; } @@ -94,28 +95,30 @@ public Object[][] getAllRansCodecs() { @DataProvider(name="allRansAndData") public Object[][] getAllRansAndData() { // this data provider provides all the testdata for all of RANS codecs - return TestNGUtils.cartesianProduct(getRansTestData(), getAllRansCodecs()); + // params: RANSEncode, RANSDecode, RANSParams, data + return TestNGUtils.cartesianProduct(getAllRansCodecs(), getRansTestData()); } @DataProvider(name="allRansAndDataForTinySmallLarge") public Object[][] getAllRansAndDataForTinySmallLarge() { // this data provider provides Tiny, Small and Large testdata for all of RANS codecs - return TestNGUtils.cartesianProduct(getRansTestDataTinySmallLarge(), getAllRansCodecs()); + // params: RANSEncode, RANSDecode, RANSParams, data, lower limit, upper limit + return TestNGUtils.cartesianProduct(getAllRansCodecs(), getRansTestDataTinySmallLarge()); } @Test(dataProvider = "allRansAndDataForTinySmallLarge") public void testSizeRangeTinySmallLarge( - final TestCaseWrapper tc, - final Integer lowerLimit, - final Integer upperLimit, final RANSEncode ransEncode, final RANSDecode ransDecode, - final RANSParams params){ - final ByteBuffer in = ByteBuffer.wrap(tc.testArray); + final RANSParams params, + final TestDataEnvelope td, + final Integer lowerLimit, + final Integer upperLimit){ + final ByteBuffer in = ByteBuffer.wrap(td.testArray); for (int size = lowerLimit; size < upperLimit; size++) { in.position(0); in.limit(size); - ransRoundTrip(in, ransEncode, ransDecode, params); + ransRoundTrip(ransEncode, ransDecode, params, in); } } @@ -182,18 +185,18 @@ public void testRansNx16Header( @Test(dataProvider="allRansAndData") public void testRANS( - final TestCaseWrapper tc, final RANSEncode ransEncode, final RANSDecode ransDecode, - final RANSParams params) { - ransRoundTrip(ByteBuffer.wrap(tc.testArray), ransEncode, ransDecode, params); + final RANSParams params, + final TestDataEnvelope td) { + ransRoundTrip(ransEncode, ransDecode, params, ByteBuffer.wrap(td.testArray)); } private static void ransRoundTrip( - final ByteBuffer data, final RANSEncode ransEncode, final RANSDecode ransDecode, - final RANSParams params) { + final RANSParams params, + final ByteBuffer data) { final ByteBuffer compressed = ransEncode.compress(data, params); final ByteBuffer uncompressed = ransDecode.uncompress(compressed, params); data.rewind(); From faf7c10dbd08e874d75ac72f37f525c675dbf1e0 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Fri, 18 Mar 2022 13:49:03 -0400 Subject: [PATCH 07/76] clean up --- .../compression/rans/RANSEncodingSymbol.java | 2 +- .../cram/compression/rans/rans4x8/E04.java | 14 +++++++------- .../cram/compression/rans/rans4x8/E14.java | 18 +++++++++--------- .../cram/compression/rans/ransnx16/D1N.java | 1 - .../cram/compression/rans/ransnx16/E1N.java | 18 +++++++++--------- 5 files changed, 26 insertions(+), 27 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java index a34dd4855d..8c3e21d2b6 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java @@ -66,7 +66,7 @@ public void set(final int start, final int freq, final int scaleBits) { rcpShift += 32; // Avoid the extra >>32 in RansEncPutSymbol } - public int putSymbol(int r, final ByteBuffer byteBuffer) { + public int putSymbol4x8(int r, final ByteBuffer byteBuffer) { ValidationUtils.validateArg(xMax != 0, "can't encode symbol with freq=0"); // re-normalize diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java index 09bd9b59c9..0a2ed371e2 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java @@ -22,11 +22,11 @@ static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[] syms, int i; switch (i = (in_size & 3)) { case 3: - rans2 = syms[0xFF & inBuffer.get(in_size - (i - 2))].putSymbol(rans2, ptr); + rans2 = syms[0xFF & inBuffer.get(in_size - (i - 2))].putSymbol4x8(rans2, ptr); case 2: - rans1 = syms[0xFF & inBuffer.get(in_size - (i - 1))].putSymbol(rans1, ptr); + rans1 = syms[0xFF & inBuffer.get(in_size - (i - 1))].putSymbol4x8(rans1, ptr); case 1: - rans0 = syms[0xFF & inBuffer.get(in_size - (i))].putSymbol(rans0, ptr); + rans0 = syms[0xFF & inBuffer.get(in_size - (i))].putSymbol4x8(rans0, ptr); case 0: break; } @@ -36,10 +36,10 @@ static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[] syms, final int c1 = 0xFF & inBuffer.get(i - 3); final int c0 = 0xFF & inBuffer.get(i - 4); - rans3 = syms[c3].putSymbol(rans3, ptr); - rans2 = syms[c2].putSymbol(rans2, ptr); - rans1 = syms[c1].putSymbol(rans1, ptr); - rans0 = syms[c0].putSymbol(rans0, ptr); + rans3 = syms[c3].putSymbol4x8(rans3, ptr); + rans2 = syms[c2].putSymbol4x8(rans2, ptr); + rans1 = syms[c1].putSymbol4x8(rans1, ptr); + rans0 = syms[c0].putSymbol4x8(rans0, ptr); } ptr.putInt(rans3); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java index 198cda6a42..3eaf7bcdcd 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java @@ -47,7 +47,7 @@ static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[][] syms l3 = 0xFF & inBuffer.get(in_size - 1); for (i3 = in_size - 2; i3 > 4 * isz4 - 2 && i3 >= 0; i3--) { final int c3 = 0xFF & inBuffer.get(i3); - rans3 = syms[c3][l3].putSymbol(rans3, ptr); + rans3 = syms[c3][l3].putSymbol4x8(rans3, ptr); l3 = c3; } @@ -57,10 +57,10 @@ static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[][] syms final int c2 = 0xFF & inBuffer.get(i2); final int c3 = 0xFF & inBuffer.get(i3); - rans3 = syms[c3][l3].putSymbol(rans3, ptr); - rans2 = syms[c2][l2].putSymbol(rans2, ptr); - rans1 = syms[c1][l1].putSymbol(rans1, ptr); - rans0 = syms[c0][l0].putSymbol(rans0, ptr); + rans3 = syms[c3][l3].putSymbol4x8(rans3, ptr); + rans2 = syms[c2][l2].putSymbol4x8(rans2, ptr); + rans1 = syms[c1][l1].putSymbol4x8(rans1, ptr); + rans0 = syms[c0][l0].putSymbol4x8(rans0, ptr); l0 = c0; l1 = c1; @@ -68,10 +68,10 @@ static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[][] syms l3 = c3; } - rans3 = syms[0][l3].putSymbol(rans3, ptr); - rans2 = syms[0][l2].putSymbol(rans2, ptr); - rans1 = syms[0][l1].putSymbol(rans1, ptr); - rans0 = syms[0][l0].putSymbol(rans0, ptr); + rans3 = syms[0][l3].putSymbol4x8(rans3, ptr); + rans2 = syms[0][l2].putSymbol4x8(rans2, ptr); + rans1 = syms[0][l1].putSymbol4x8(rans1, ptr); + rans0 = syms[0][l0].putSymbol4x8(rans0, ptr); ptr.order(ByteOrder.BIG_ENDIAN); ptr.putInt(rans3); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java index c55cefa591..86ba94776e 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java @@ -65,7 +65,6 @@ static void uncompress( // Remainder for (; i7 < out_sz; i7++) { final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; - // should this be write uint7? outBuffer.put(i7, (byte) c7); rans7 = syms[l7][c7].advanceSymbolNx16(rans7, inBuffer, Constants.TOTAL_FREQ_SHIFT); l7 = c7; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E1N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E1N.java index 012f3befb4..2ba97f1a57 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E1N.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E1N.java @@ -49,7 +49,7 @@ static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[][] syms l3 = 0xFF & inBuffer.get(in_size - 1); for (i3 = in_size - 2; i3 > 4 * isz4 - 2 && i3 >= 0; i3--) { final int c3 = 0xFF & inBuffer.get(i3); - rans3 = syms[c3][l3].putSymbol(rans3, ptr); + rans3 = syms[c3][l3].putSymbolNx16(rans3, ptr); l3 = c3; } @@ -59,10 +59,10 @@ static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[][] syms final int c2 = 0xFF & inBuffer.get(i2); final int c3 = 0xFF & inBuffer.get(i3); - rans3 = syms[c3][l3].putSymbol(rans3, ptr); - rans2 = syms[c2][l2].putSymbol(rans2, ptr); - rans1 = syms[c1][l1].putSymbol(rans1, ptr); - rans0 = syms[c0][l0].putSymbol(rans0, ptr); + rans3 = syms[c3][l3].putSymbolNx16(rans3, ptr); + rans2 = syms[c2][l2].putSymbolNx16(rans2, ptr); + rans1 = syms[c1][l1].putSymbolNx16(rans1, ptr); + rans0 = syms[c0][l0].putSymbolNx16(rans0, ptr); l0 = c0; l1 = c1; @@ -70,10 +70,10 @@ static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[][] syms l3 = c3; } - rans3 = syms[0][l3].putSymbol(rans3, ptr); - rans2 = syms[0][l2].putSymbol(rans2, ptr); - rans1 = syms[0][l1].putSymbol(rans1, ptr); - rans0 = syms[0][l0].putSymbol(rans0, ptr); + rans3 = syms[0][l3].putSymbolNx16(rans3, ptr); + rans2 = syms[0][l2].putSymbolNx16(rans2, ptr); + rans1 = syms[0][l1].putSymbolNx16(rans1, ptr); + rans0 = syms[0][l0].putSymbolNx16(rans0, ptr); ptr.order(ByteOrder.BIG_ENDIAN); ptr.putInt(rans3); From 720357bea9529530ef36a8b1c297d2eeb2d03a31 Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Wed, 20 Apr 2022 10:45:28 -0400 Subject: [PATCH 08/76] Update RAN test method names. --- .../htsjdk/samtools/cram/compression/rans/RansTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index b4e6f002e0..71bedea378 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -82,7 +82,7 @@ public Object[][] getRansNx16Codecs() { final RANSNx16Decode ransNx16Decode = new RANSNx16Decode(); // TODO: More formatFlags values i.e, combinations of bit flags will be added later return new Object[][]{ - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0)} //RANSNx16 formatFlags(first byte) 0 + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0)} ,//RANSNx16 formatFlags(first byte) 0 }; } @@ -107,7 +107,7 @@ public Object[][] getAllRansAndDataForTinySmallLarge() { } @Test(dataProvider = "allRansAndDataForTinySmallLarge") - public void testSizeRangeTinySmallLarge( + public void testRoundTripTinySmallLarge( final RANSEncode ransEncode, final RANSDecode ransDecode, final RANSParams params, @@ -184,7 +184,7 @@ public void testRansNx16Header( } @Test(dataProvider="allRansAndData") - public void testRANS( + public void testRoundTrip( final RANSEncode ransEncode, final RANSDecode ransDecode, final RANSParams params, From 03773c6efb6f05be7f8366831e741a82b3ca4dcc Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Wed, 20 Apr 2022 14:07:46 -0400 Subject: [PATCH 09/76] Remove unncessary params arg from uncompress methods (params are embedded in the streams). --- .../compression/RANSExternalCompressor.java | 3 +-- .../cram/compression/rans/RANSDecode.java | 4 ++-- .../rans/rans4x8/RANS4x8Decode.java | 4 ++-- .../rans/ransnx16/RANSNx16Decode.java | 6 +++--- .../rans/ransnx16/RANSNx16Params.java | 18 +++++++++--------- .../samtools/cram/CRAMCodecCorpusTest.java | 4 ++-- .../cram/compression/rans/RansTest.java | 4 ++-- 7 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java b/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java index 0898e2de21..848d7a2906 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java +++ b/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java @@ -74,8 +74,7 @@ public byte[] compress(final byte[] data) { @Override public byte[] uncompress(byte[] data) { - final RANS4x8Params params = new RANS4x8Params(order); - final ByteBuffer buf = ransDecode.uncompress(ByteBuffer.wrap(data), params); + final ByteBuffer buf = ransDecode.uncompress(ByteBuffer.wrap(data)); return toByteArray(buf); } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java index e38b6745f1..d20826f12e 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java @@ -2,7 +2,7 @@ import java.nio.ByteBuffer; -public abstract class RANSDecode { +public abstract class RANSDecode { private ArithmeticDecoder[] D; private RANSDecodingSymbol[][] decodingSymbols; @@ -15,7 +15,7 @@ protected RANSDecodingSymbol[][] getDecodingSymbols() { return decodingSymbols; } - public abstract ByteBuffer uncompress(final ByteBuffer inBuffer, final T params); + public abstract ByteBuffer uncompress(final ByteBuffer inBuffer); // Lazy initialization of working memory for the decoder protected void initializeRANSDecoder() { diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index b13e211fcb..a6dd499bd5 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -10,12 +10,12 @@ import java.nio.ByteOrder; import java.util.Arrays; -public class RANS4x8Decode extends RANSDecode { +public class RANS4x8Decode extends RANSDecode { private static final int RAW_BYTE_LENGTH = 4; private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); - public ByteBuffer uncompress(final ByteBuffer inBuffer, final RANS4x8Params params) { + public ByteBuffer uncompress(final ByteBuffer inBuffer) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 0a1c402ea9..2b21c5baa0 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -11,11 +11,11 @@ import java.nio.ByteOrder; import java.util.Arrays; -public class RANSNx16Decode extends RANSDecode{ +public class RANSNx16Decode extends RANSDecode { private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); private static final int FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK = 0x01; - public ByteBuffer uncompress(final ByteBuffer inBuffer, final RANSNx16Params params) { + public ByteBuffer uncompress(final ByteBuffer inBuffer) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } @@ -26,7 +26,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, final RANSNx16Params pa // the first byte of compressed stream gives the formatFlags final int formatFlags = inBuffer.get(); - params.setFormatFlags(formatFlags); + final RANSNx16Params params = new RANSNx16Params(formatFlags); int n_out = params.getnOut(); final RANSParams.ORDER order = params.getOrder(); // Order-0 or Order-1 entropy coding final boolean x32 = params.getX32(); // Interleave N = 32 rANS states (else N = 4) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java index f753e7e144..e4f1a7256b 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java @@ -4,6 +4,15 @@ public class RANSNx16Params implements RANSParams { + // RANS Nx16 Bit Flags + public static final int ORDER_FLAG_MASK = 0x01; + public static final int X32_FLAG_MASK = 0x04; + public static final int STRIPE_FLAG_MASK = 0x08; + public static final int NOSZ_FLAG_MASK = 0x10; + public static final int CAT_FLAG_MASK = 0x20; + public static final int RLE_FLAG_MASK = 0x40; + public static final int PACK_FLAG_MASK = 0x80; + // format is the first byte of the compressed data stream, // which consists of all the bit-flags detailing the type of transformations // and entropy encoders to be combined @@ -12,15 +21,6 @@ public class RANSNx16Params implements RANSParams { // To get the least significant 7 bits of format byte private static final int FORMAT_FLAG_MASK = 0x7f; - // RANS Nx16 Bit Flags - private static final int ORDER_FLAG_MASK = 0x01; - private static final int X32_FLAG_MASK = 0x04; - private static final int STRIPE_FLAG_MASK = 0x08; - private static final int NOSZ_FLAG_MASK = 0x10; - private static final int CAT_FLAG_MASK = 0x20; - private static final int RLE_FLAG_MASK = 0x40; - private static final int PACK_FLAG_MASK = 0x80; - // output length. Used as input param to RANS Nx16 uncompress method private final int nOut = 0; diff --git a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java index d882a73efc..3026be066b 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java @@ -130,7 +130,7 @@ public void testRANSRoundTrip( params.toString(), uncompressedBytes.remaining(), compressedBytes.remaining())); - Assert.assertEquals(ransDecode.uncompress(compressedBytes, params), uncompressedBytes); + Assert.assertEquals(ransDecode.uncompress(compressedBytes), uncompressedBytes); } } @@ -169,7 +169,7 @@ public void testRANSPreCompressed( Assert.assertEquals(htsjdkCompressedBytes, preCompressedInputBytes); // Use htsjdk to uncompress the precompressed file from htscodecs repo - final ByteBuffer htsjdkUncompressedBytes = ransDecode.uncompress(preCompressedInputBytes, params); + final ByteBuffer htsjdkUncompressedBytes = ransDecode.uncompress(preCompressedInputBytes); // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo Assert.assertEquals(htsjdkUncompressedBytes, inputBytes); diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 71bedea378..b66ac99407 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -198,7 +198,7 @@ private static void ransRoundTrip( final RANSParams params, final ByteBuffer data) { final ByteBuffer compressed = ransEncode.compress(data, params); - final ByteBuffer uncompressed = ransDecode.uncompress(compressed, params); + final ByteBuffer uncompressed = ransDecode.uncompress(compressed); data.rewind(); while (data.hasRemaining()) { if (!uncompressed.hasRemaining()) { @@ -217,7 +217,7 @@ public ByteBuffer ransBufferMeetBoundaryExpectations( // helper method for Boundary Expectations test final ByteBuffer raw = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); final ByteBuffer compressed = ransEncode.compress(raw, params); - final ByteBuffer uncompressed = ransDecode.uncompress(compressed,params); + final ByteBuffer uncompressed = ransDecode.uncompress(compressed); Assert.assertFalse(compressed.hasRemaining()); compressed.rewind(); Assert.assertEquals(uncompressed.limit(), size); From 4a4194859094d88ff5020e967de7c7890b5de92c Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Wed, 20 Apr 2022 14:35:48 -0400 Subject: [PATCH 10/76] Remove unnecessary RANSNx16Params state. --- .../compression/rans/ransnx16/RANSNx16Decode.java | 5 +---- .../compression/rans/ransnx16/RANSNx16Params.java | 13 ------------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 2b21c5baa0..a4f81f31dd 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -27,7 +27,6 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { // the first byte of compressed stream gives the formatFlags final int formatFlags = inBuffer.get(); final RANSNx16Params params = new RANSNx16Params(formatFlags); - int n_out = params.getnOut(); final RANSParams.ORDER order = params.getOrder(); // Order-0 or Order-1 entropy coding final boolean x32 = params.getX32(); // Interleave N = 32 rANS states (else N = 4) final boolean stripe = params.getStripe(); //multiway interleaving of byte streams @@ -42,9 +41,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { final int Nway = (x32) ? 32 : 4; // if nosz is set, then uncompressed size is not recorded. - if (!nosz) { - n_out = Utils.readUint7(inBuffer); - } + int n_out = nosz ? 0 : Utils.readUint7(inBuffer); ByteBuffer outBuffer = ByteBuffer.allocate(n_out); // If CAT is set then, the input is uncompressed diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java index e4f1a7256b..e4deb34a84 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java @@ -21,9 +21,6 @@ public class RANSNx16Params implements RANSParams { // To get the least significant 7 bits of format byte private static final int FORMAT_FLAG_MASK = 0x7f; - // output length. Used as input param to RANS Nx16 uncompress method - private final int nOut = 0; - public RANSNx16Params(int formatFlags) { this.formatFlags = formatFlags; } @@ -44,10 +41,6 @@ public int getFormatFlags(){ return formatFlags & FORMAT_FLAG_MASK; } - public void setFormatFlags(int formatFlags) { - this.formatFlags = formatFlags; - } - public boolean getX32(){ // Interleave N = 32 rANS states (else N = 4) return ((formatFlags & X32_FLAG_MASK)!=0); @@ -78,10 +71,4 @@ public boolean getPack(){ return ((formatFlags & PACK_FLAG_MASK)!=0); } - public int getnOut() { - // nOut is the length of uncompressed data - // used in uncompress method - return nOut; - } - } \ No newline at end of file From ba088c676d3d2f2127fa9802865482d11fef2e95 Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Wed, 20 Apr 2022 14:52:46 -0400 Subject: [PATCH 11/76] Fix bug in the case where the cat bit is set. --- .../cram/compression/rans/ransnx16/RANSNx16Decode.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index a4f81f31dd..4ddac0c59c 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -47,7 +47,8 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { // If CAT is set then, the input is uncompressed if (cat){ byte[] data = new byte[n_out]; - outBuffer = inBuffer.get( data,0, n_out); + inBuffer.get( data,0, n_out); + return ByteBuffer.wrap(data); } else { switch (order){ From ed68e3bd028d02fed929274c19b11c0856e5459a Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Wed, 20 Apr 2022 14:55:30 -0400 Subject: [PATCH 12/76] Reduce unncessary buffer allocation. --- .../cram/compression/rans/ransnx16/RANSNx16Decode.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 4ddac0c59c..21c33d646e 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -42,7 +42,6 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { // if nosz is set, then uncompressed size is not recorded. int n_out = nosz ? 0 : Utils.readUint7(inBuffer); - ByteBuffer outBuffer = ByteBuffer.allocate(n_out); // If CAT is set then, the input is uncompressed if (cat){ @@ -51,18 +50,19 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { return ByteBuffer.wrap(data); } else { + final ByteBuffer outBuffer = ByteBuffer.allocate(n_out); switch (order){ case ZERO: - outBuffer = uncompressOrder0WayN(inBuffer, outBuffer, n_out, Nway); + uncompressOrder0WayN(inBuffer, outBuffer, n_out, Nway); break; case ONE: - outBuffer = uncompressOrder1WayN(inBuffer, outBuffer, n_out, Nway); + uncompressOrder1WayN(inBuffer, outBuffer, n_out, Nway); break; default: throw new RuntimeException("Unknown rANS order: " + order); } + return outBuffer; } - return outBuffer; } private ByteBuffer uncompressOrder0WayN( From 0ce908080046123e7788e4e7ed25354801fa1f90 Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Wed, 20 Apr 2022 15:32:20 -0400 Subject: [PATCH 13/76] Thread RANSNx16 params through RANSNx16 implementation. --- .../cram/compression/rans/ransnx16/D0N.java | 3 +- .../cram/compression/rans/ransnx16/D1N.java | 2 +- .../cram/compression/rans/ransnx16/E0N.java | 4 +- .../rans/ransnx16/RANSNx16Decode.java | 35 +++++---------- .../rans/ransnx16/RANSNx16Encode.java | 44 +++++++------------ .../rans/ransnx16/RANSNx16Params.java | 8 ++-- 6 files changed, 39 insertions(+), 57 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java index bb311e52ed..792b6dabee 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java @@ -14,7 +14,8 @@ static void uncompress( RANSDecodingSymbol[] syms, final ByteBuffer outBuffer, final int out_sz, - final int Nway) { + final RANSNx16Params ransNx16Params) { + final int Nway = ransNx16Params.getInterleaveSize(); // Nway parallel rans states. Nway = 4 or 32 final int[] rans = new int[Nway]; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java index 86ba94776e..5821d4e95f 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java @@ -14,7 +14,7 @@ static void uncompress( final ByteBuffer outBuffer, final ArithmeticDecoder[] D, final RANSDecodingSymbol[][] syms, - final int Nway) { + final RANSNx16Params ransNx16Params) { // uncompress for Nway = 4. then extend Nway to be variable - 4 or 32 // TODO: Fails - unexpected symbol in the third iteration of the for loop. diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java index bbd62c1e0a..56db59a65f 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java @@ -11,7 +11,9 @@ static int compress( final ByteBuffer inBuffer, final RANSEncodingSymbol[] syms, final ByteBuffer cp, - final int Nway) { + final RANSNx16Params ransNx16Params) { + final int Nway = ransNx16Params.getInterleaveSize(); + final int cdata_size; final int in_size = inBuffer.remaining(); final ByteBuffer ptr = cp.slice(); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 21c33d646e..1ff98a4229 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -4,7 +4,6 @@ import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSDecode; import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; -import htsjdk.samtools.cram.compression.rans.RANSParams; import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; @@ -26,40 +25,30 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { // the first byte of compressed stream gives the formatFlags final int formatFlags = inBuffer.get(); - final RANSNx16Params params = new RANSNx16Params(formatFlags); - final RANSParams.ORDER order = params.getOrder(); // Order-0 or Order-1 entropy coding - final boolean x32 = params.getX32(); // Interleave N = 32 rANS states (else N = 4) - final boolean stripe = params.getStripe(); //multiway interleaving of byte streams - final boolean nosz = params.getNosz(); // original size is not recorded - final boolean cat = params.getCAT(); // Data is uncompressed - final boolean rle = params.getRLE(); // Run length encoding, with runs and literals encoded separately - final boolean pack = params.getPack(); // Pack 2, 4, 8 or infinite symbols per byte + final RANSNx16Params ransNx16Params = new RANSNx16Params(formatFlags); // TODO: add methods to handle various flags - // N-way interleaving. If the NWay flag is set, use 32 way interleaving, else use 4 way - final int Nway = (x32) ? 32 : 4; - // if nosz is set, then uncompressed size is not recorded. - int n_out = nosz ? 0 : Utils.readUint7(inBuffer); + int n_out = ransNx16Params.getNosz() ? 0 : Utils.readUint7(inBuffer); // If CAT is set then, the input is uncompressed - if (cat){ + if (ransNx16Params.getCAT()){ byte[] data = new byte[n_out]; inBuffer.get( data,0, n_out); return ByteBuffer.wrap(data); } else { final ByteBuffer outBuffer = ByteBuffer.allocate(n_out); - switch (order){ + switch (ransNx16Params.getOrder()){ case ZERO: - uncompressOrder0WayN(inBuffer, outBuffer, n_out, Nway); + uncompressOrder0WayN(inBuffer, outBuffer, n_out, ransNx16Params); break; case ONE: - uncompressOrder1WayN(inBuffer, outBuffer, n_out, Nway); + uncompressOrder1WayN(inBuffer, outBuffer, n_out, ransNx16Params); break; default: - throw new RuntimeException("Unknown rANS order: " + order); + throw new RuntimeException("Unknown rANS order: " + ransNx16Params.getOrder()); } return outBuffer; } @@ -69,13 +58,13 @@ private ByteBuffer uncompressOrder0WayN( final ByteBuffer inBuffer, final ByteBuffer outBuffer, final int n_out, - final int Nway) { + final RANSNx16Params ransNx16Params) { // read the frequency table, get the normalised frequencies and use it to set the RANSDecodingSymbols readFrequencyTableOrder0(inBuffer); // uncompress using Nway rans states - D0N.uncompress(inBuffer, getD()[0], getDecodingSymbols()[0], outBuffer,n_out,Nway); + D0N.uncompress(inBuffer, getD()[0], getDecodingSymbols()[0], outBuffer, n_out, ransNx16Params); return outBuffer; } @@ -83,7 +72,7 @@ private ByteBuffer uncompressOrder1WayN( final ByteBuffer inBuffer, final ByteBuffer outBuffer, final int n_out, - final int Nway) { + final RANSNx16Params ransNx16Params) { // TODO: does not work as expected. Need to fix! // read the first byte and calculate the bit shift @@ -105,13 +94,13 @@ private ByteBuffer uncompressOrder1WayN( freqTableSource = ByteBuffer.allocate(uncompressedLength); ByteBuffer compressedFrequencyTableBuffer = ByteBuffer.wrap(compressedFreqTable); compressedFrequencyTableBuffer.order(ByteOrder.LITTLE_ENDIAN); - uncompressOrder0WayN(compressedFrequencyTableBuffer, freqTableSource, uncompressedLength,4); + uncompressOrder0WayN(compressedFrequencyTableBuffer, freqTableSource, uncompressedLength,ransNx16Params); } else { freqTableSource = inBuffer; } readFrequencyTableOrder1(freqTableSource, shift); - D1N.uncompress(inBuffer, outBuffer, getD(), getDecodingSymbols(), Nway); + D1N.uncompress(inBuffer, outBuffer, getD(), getDecodingSymbols(), ransNx16Params); return outBuffer; } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index 4318ba7660..227a9f2000 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -3,7 +3,6 @@ import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSEncode; import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; -import htsjdk.samtools.cram.compression.rans.RANSParams; import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; @@ -13,35 +12,24 @@ public class RANSNx16Encode extends RANSEncode { private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); private static final int MINIMUM__ORDER_1_SIZE = 4; - public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params params) { + public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransNx16Params) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } final ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining()); - final int formatFlags = params.getFormatFlags(); + final int formatFlags = ransNx16Params.getFormatFlags(); outBuffer.put((byte) (formatFlags)); // one byte for formatFlags - final RANSParams.ORDER order = params.getOrder(); // Order-0 or Order-1 entropy coding - final boolean x32 = params.getX32(); // Interleave N = 32 rANS states (else N = 4) - final boolean stripe = params.getStripe(); //multiway interleaving of byte streams - final boolean nosz = params.getNosz(); // original size is not recorded - final boolean cat = params.getCAT(); // Data is uncompressed - final boolean rle = params.getRLE(); // Run length encoding, with runs and literals encoded separately - final boolean pack = params.getPack(); // Pack 2, 4, 8 or infinite symbols per byte // TODO: add methods to handle various flags - // N-way interleaving - final int Nway = (x32) ? 32 : 4; - - //stripe size - final int N = formatFlags>>8; - - if (!nosz) { + if (!ransNx16Params.getNosz()) { + // original size is not recorded int insize = inBuffer.remaining(); Utils.writeUint7(insize,outBuffer); } initializeRANSEncoder(); - if (cat) { + if (ransNx16Params.getCAT()) { + // Data is uncompressed outBuffer.put(inBuffer); return outBuffer; } @@ -49,20 +37,20 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params param if (inBuffer.remaining() < MINIMUM__ORDER_1_SIZE) { // TODO: check if this still applies for Nx16 or if there is a different limit // ORDER-1 encoding of less than 4 bytes is not permitted, so just use ORDER-0 - return compressOrder0WayN(inBuffer, Nway, outBuffer); + return compressOrder0WayN(inBuffer, ransNx16Params, outBuffer); } - switch (order) { + switch (ransNx16Params.getOrder()) { case ZERO: - return compressOrder0WayN(inBuffer, Nway, outBuffer); + return compressOrder0WayN(inBuffer, ransNx16Params, outBuffer); case ONE: - return compressOrder1WayN(inBuffer, Nway, outBuffer); + return compressOrder1WayN(inBuffer, ransNx16Params, outBuffer); default: - throw new RuntimeException("Unknown rANS order: " + order); + throw new RuntimeException("Unknown rANS order: " + ransNx16Params.getOrder()); } } - private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final int Nway, final ByteBuffer outBuffer) { + private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final RANSNx16Params ransNx16Params, final ByteBuffer outBuffer) { final int inSize = inBuffer.remaining(); final int[] F = buildFrequenciesOrder0(inBuffer); final ByteBuffer cp = outBuffer.slice(); @@ -85,16 +73,16 @@ private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final int Nway, // update the RANS Encoding Symbols buildSymsOrder0(F); inBuffer.rewind(); - final int compressedBlobSize = E0N.compress(inBuffer, getEncodingSymbols()[0], cp, Nway); + final int compressedBlobSize = E0N.compress(inBuffer, getEncodingSymbols()[0], cp, ransNx16Params); outBuffer.rewind(); // set position to 0 outBuffer.limit(prefix_size + frequencyTableSize + compressedBlobSize); return outBuffer; } - private ByteBuffer compressOrder1WayN(final ByteBuffer inBuffer, final int Nway, final ByteBuffer outBuffer) { + private ByteBuffer compressOrder1WayN(final ByteBuffer inBuffer, final RANSNx16Params ransNx16Params, final ByteBuffer outBuffer) { //TODO: does not work as expected. Need to fix final ByteBuffer cp = outBuffer.slice(); - final int[][] F = buildFrequenciesOrder1(inBuffer, Nway); + final int[][] F = buildFrequenciesOrder1(inBuffer, ransNx16Params.getInterleaveSize()); final int shift = 12; // normalise frequencies with a variable shift calculated @@ -112,7 +100,7 @@ private ByteBuffer compressOrder1WayN(final ByteBuffer inBuffer, final int Nway, frequencyTable.rewind(); // compressed frequency table using RANS Nx16 Order 0 - compressedFrequencyTable = compressOrder0WayN(frequencyTable,4,compressedFrequencyTable); + compressedFrequencyTable = compressOrder0WayN(frequencyTable, ransNx16Params, compressedFrequencyTable); frequencyTable.rewind(); int compressedFrequencyTableSize = compressedFrequencyTable.limit(); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java index e4deb34a84..6d5e99745d 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java @@ -6,7 +6,7 @@ public class RANSNx16Params implements RANSParams { // RANS Nx16 Bit Flags public static final int ORDER_FLAG_MASK = 0x01; - public static final int X32_FLAG_MASK = 0x04; + public static final int N32_FLAG_MASK = 0x04; public static final int STRIPE_FLAG_MASK = 0x08; public static final int NOSZ_FLAG_MASK = 0x10; public static final int CAT_FLAG_MASK = 0x20; @@ -41,9 +41,11 @@ public int getFormatFlags(){ return formatFlags & FORMAT_FLAG_MASK; } - public boolean getX32(){ + public int getInterleaveSize(){ // Interleave N = 32 rANS states (else N = 4) - return ((formatFlags & X32_FLAG_MASK)!=0); + //TODO: temporarily always return 4 + //return ((formatFlags & N32_FLAG_MASK) == 0) ? 32 : 4; + return 4; } public boolean getStripe(){ From 671d21f501047c07885bf1a2f42f4cfc5f7091e3 Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Wed, 20 Apr 2022 15:45:15 -0400 Subject: [PATCH 14/76] Dont initialize RANSNx16 decoding structures unless we're going to use them. --- .../samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java | 2 +- .../samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 1ff98a4229..9e6254dc20 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -21,7 +21,6 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { // For RANS decoding, the bytes are read in little endian from the input stream inBuffer.order(ByteOrder.LITTLE_ENDIAN); - initializeRANSDecoder(); // the first byte of compressed stream gives the formatFlags final int formatFlags = inBuffer.get(); @@ -39,6 +38,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { return ByteBuffer.wrap(data); } else { + initializeRANSDecoder(); final ByteBuffer outBuffer = ByteBuffer.allocate(n_out); switch (ransNx16Params.getOrder()){ case ZERO: diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index 227a9f2000..aa64cc83d9 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -27,13 +27,13 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN int insize = inBuffer.remaining(); Utils.writeUint7(insize,outBuffer); } - initializeRANSEncoder(); if (ransNx16Params.getCAT()) { // Data is uncompressed outBuffer.put(inBuffer); return outBuffer; } + initializeRANSEncoder(); if (inBuffer.remaining() < MINIMUM__ORDER_1_SIZE) { // TODO: check if this still applies for Nx16 or if there is a different limit // ORDER-1 encoding of less than 4 bytes is not permitted, so just use ORDER-0 From 9cd168a8283d523f659e5bdabbbfdff974fdfff9 Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Fri, 22 Apr 2022 09:58:51 -0400 Subject: [PATCH 15/76] Move/inline RANS Nx16 D0N uncompress method into RANSNx16Decode. --- .../cram/compression/rans/ransnx16/D0N.java | 57 ------------------- .../rans/ransnx16/RANSNx16Decode.java | 42 +++++++++++++- 2 files changed, 41 insertions(+), 58 deletions(-) delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java deleted file mode 100644 index 792b6dabee..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D0N.java +++ /dev/null @@ -1,57 +0,0 @@ -package htsjdk.samtools.cram.compression.rans.ransnx16; - -import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; -import htsjdk.samtools.cram.compression.rans.Constants; -import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; -import htsjdk.samtools.cram.compression.rans.Utils; - -import java.nio.ByteBuffer; - -public class D0N { - static void uncompress( - final ByteBuffer inBuffer, - ArithmeticDecoder D, - RANSDecodingSymbol[] syms, - final ByteBuffer outBuffer, - final int out_sz, - final RANSNx16Params ransNx16Params) { - final int Nway = ransNx16Params.getInterleaveSize(); - - // Nway parallel rans states. Nway = 4 or 32 - final int[] rans = new int[Nway]; - - // c is the array of decoded symbols - final byte[] c = new byte[Nway]; - int r; - for (r=0; r0){ - byte symbol = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans[rev_idx], Constants.TOTAL_FREQ_SHIFT)]; - syms[0xFF & symbol].advanceSymbolNx16(rans[rev_idx], inBuffer, Constants.TOTAL_FREQ_SHIFT); - outBuffer.put(symbol); - remSize --; - rev_idx ++; - } - outBuffer.position(0); - } - -} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 9e6254dc20..98d7cf3d0e 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -64,7 +64,47 @@ private ByteBuffer uncompressOrder0WayN( readFrequencyTableOrder0(inBuffer); // uncompress using Nway rans states - D0N.uncompress(inBuffer, getD()[0], getDecodingSymbols()[0], outBuffer, n_out, ransNx16Params); + //TODO: remove this temporary variable aliasing/staging + final ArithmeticDecoder D = getD()[0]; + final RANSDecodingSymbol[] syms = getDecodingSymbols()[0]; + final int Nway = ransNx16Params.getInterleaveSize(); + + // Nway parallel rans states. Nway = 4 or 32 + final int[] rans = new int[Nway]; + + // c is the array of decoded symbols + final byte[] c = new byte[Nway]; + int r; + for (r=0; r0){ + byte symbol = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans[rev_idx], Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & symbol].advanceSymbolNx16(rans[rev_idx], inBuffer, Constants.TOTAL_FREQ_SHIFT); + outBuffer.put(symbol); + remSize --; + rev_idx ++; + } + outBuffer.position(0); + return outBuffer; } From 26889062e660c5813ec877f2665d21fcc3e5971d Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Mon, 25 Apr 2022 16:42:36 -0400 Subject: [PATCH 16/76] Move/inline RANS Nx16 D1N uncompress method into RANSNx16Decode. --- .../cram/compression/rans/ransnx16/D1N.java | 74 ------------------- .../rans/ransnx16/RANSNx16Decode.java | 58 ++++++++++++++- 2 files changed, 57 insertions(+), 75 deletions(-) delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java deleted file mode 100644 index 5821d4e95f..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/D1N.java +++ /dev/null @@ -1,74 +0,0 @@ -package htsjdk.samtools.cram.compression.rans.ransnx16; - -import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; -import htsjdk.samtools.cram.compression.rans.Constants; -import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; -import htsjdk.samtools.cram.compression.rans.Utils; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -public class D1N { - static void uncompress( - final ByteBuffer inBuffer, - final ByteBuffer outBuffer, - final ArithmeticDecoder[] D, - final RANSDecodingSymbol[][] syms, - final RANSNx16Params ransNx16Params) { - - // uncompress for Nway = 4. then extend Nway to be variable - 4 or 32 - // TODO: Fails - unexpected symbol in the third iteration of the for loop. - final int out_sz = outBuffer.remaining(); - int rans0, rans1, rans2, rans7; - inBuffer.order(ByteOrder.LITTLE_ENDIAN); - rans0 = inBuffer.getInt(); - rans1 = inBuffer.getInt(); - rans2 = inBuffer.getInt(); - rans7 = inBuffer.getInt(); - - final int isz4 = out_sz >> 2; - int i0 = 0; - int i1 = isz4; - int i2 = 2 * isz4; - int i7 = 3 * isz4; - int l0 = 0; - int l1 = 0; - int l2 = 0; - int l7 = 0; - for (; i0 < isz4; i0++, i1++, i2++, i7++) { - final int c0 = 0xFF & D[l0].reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; - final int c1 = 0xFF & D[l1].reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; - final int c2 = 0xFF & D[l2].reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; - final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; - - outBuffer.put(i0, (byte) c0); - outBuffer.put(i1, (byte) c1); - outBuffer.put(i2, (byte) c2); - outBuffer.put(i7, (byte) c7); - - rans0 = syms[l0][c0].advanceSymbolStep(rans0, Constants.TOTAL_FREQ_SHIFT); - rans1 = syms[l1][c1].advanceSymbolStep(rans1, Constants.TOTAL_FREQ_SHIFT); - rans2 = syms[l2][c2].advanceSymbolStep(rans2, Constants.TOTAL_FREQ_SHIFT); - rans7 = syms[l7][c7].advanceSymbolStep(rans7, Constants.TOTAL_FREQ_SHIFT); - - rans0 = Utils.RANSDecodeRenormalizeNx16(rans0, inBuffer); - rans1 = Utils.RANSDecodeRenormalizeNx16(rans1, inBuffer); - rans2 = Utils.RANSDecodeRenormalizeNx16(rans2, inBuffer); - rans7 = Utils.RANSDecodeRenormalizeNx16(rans7, inBuffer); - - l0 = c0; - l1 = c1; - l2 = c2; - l7 = c7; - } - - // Remainder - for (; i7 < out_sz; i7++) { - final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; - outBuffer.put(i7, (byte) c7); - rans7 = syms[l7][c7].advanceSymbolNx16(rans7, inBuffer, Constants.TOTAL_FREQ_SHIFT); - l7 = c7; - } - } - -} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 98d7cf3d0e..259242a5a8 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -140,7 +140,63 @@ private ByteBuffer uncompressOrder1WayN( freqTableSource = inBuffer; } readFrequencyTableOrder1(freqTableSource, shift); - D1N.uncompress(inBuffer, outBuffer, getD(), getDecodingSymbols(), ransNx16Params); + + final ArithmeticDecoder[] D = getD(); + final RANSDecodingSymbol[][] syms = getDecodingSymbols(); + + // uncompress for Nway = 4. then extend Nway to be variable - 4 or 32 + // TODO: Fails - unexpected symbol in the third iteration of the for loop. + final int out_sz = outBuffer.remaining(); + int rans0, rans1, rans2, rans7; + inBuffer.order(ByteOrder.LITTLE_ENDIAN); + rans0 = inBuffer.getInt(); + rans1 = inBuffer.getInt(); + rans2 = inBuffer.getInt(); + rans7 = inBuffer.getInt(); + + final int isz4 = out_sz >> 2; + int i0 = 0; + int i1 = isz4; + int i2 = 2 * isz4; + int i7 = 3 * isz4; + int l0 = 0; + int l1 = 0; + int l2 = 0; + int l7 = 0; + for (; i0 < isz4; i0++, i1++, i2++, i7++) { + final int c0 = 0xFF & D[l0].reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; + final int c1 = 0xFF & D[l1].reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; + final int c2 = 0xFF & D[l2].reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; + final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; + + outBuffer.put(i0, (byte) c0); + outBuffer.put(i1, (byte) c1); + outBuffer.put(i2, (byte) c2); + outBuffer.put(i7, (byte) c7); + + rans0 = syms[l0][c0].advanceSymbolStep(rans0, Constants.TOTAL_FREQ_SHIFT); + rans1 = syms[l1][c1].advanceSymbolStep(rans1, Constants.TOTAL_FREQ_SHIFT); + rans2 = syms[l2][c2].advanceSymbolStep(rans2, Constants.TOTAL_FREQ_SHIFT); + rans7 = syms[l7][c7].advanceSymbolStep(rans7, Constants.TOTAL_FREQ_SHIFT); + + rans0 = Utils.RANSDecodeRenormalizeNx16(rans0, inBuffer); + rans1 = Utils.RANSDecodeRenormalizeNx16(rans1, inBuffer); + rans2 = Utils.RANSDecodeRenormalizeNx16(rans2, inBuffer); + rans7 = Utils.RANSDecodeRenormalizeNx16(rans7, inBuffer); + + l0 = c0; + l1 = c1; + l2 = c2; + l7 = c7; + } + + // Remainder + for (; i7 < out_sz; i7++) { + final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; + outBuffer.put(i7, (byte) c7); + rans7 = syms[l7][c7].advanceSymbolNx16(rans7, inBuffer, Constants.TOTAL_FREQ_SHIFT); + l7 = c7; + } return outBuffer; } From e01b08ec768396284d4ab5f866c32c4e55e5e4d0 Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Mon, 25 Apr 2022 16:52:30 -0400 Subject: [PATCH 17/76] Move/inline RANS Nx16 E0N compress method into RANSNx16Encode. --- .../cram/compression/rans/ransnx16/E0N.java | 66 ------------------- .../rans/ransnx16/RANSNx16Encode.java | 55 +++++++++++++++- 2 files changed, 53 insertions(+), 68 deletions(-) delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java deleted file mode 100644 index 56db59a65f..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E0N.java +++ /dev/null @@ -1,66 +0,0 @@ -package htsjdk.samtools.cram.compression.rans.ransnx16; - -import htsjdk.samtools.cram.compression.rans.Constants; -import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; -import htsjdk.samtools.cram.compression.rans.Utils; - -import java.nio.ByteBuffer; - -public class E0N { - static int compress( - final ByteBuffer inBuffer, - final RANSEncodingSymbol[] syms, - final ByteBuffer cp, - final RANSNx16Params ransNx16Params) { - final int Nway = ransNx16Params.getInterleaveSize(); - - final int cdata_size; - final int in_size = inBuffer.remaining(); - final ByteBuffer ptr = cp.slice(); - final int[] rans = new int[Nway]; - final int[] c = new int[Nway]; // c is the array of symbols - int r; - for (r=0; r0){ - - // encode remaining elements first - int symbol_ =0xFF & inBuffer.get(in_size - rev_idx); - rans[remSize - 1] = syms[symbol_].putSymbolNx16(rans[remSize - 1], ptr); - remSize --; - rev_idx ++; - } - int i; - - for (i = (in_size - (in_size%Nway)); i > 0; i -= Nway) { - for (r = Nway - 1; r >= 0; r--){ - - // encode using Nway parallel rans states. Nway = 4 or 32 - c[r] = 0xFF & inBuffer.get(i - (Nway - r)); - rans[r] = syms[c[r]].putSymbolNx16(rans[r], ptr); - } - } - for (i=Nway-1; i>=0; i--){ - ptr.putInt(rans[i]); - } - ptr.position(); - ptr.flip(); - cdata_size = ptr.limit(); - - // since the data is encoded in reverse order, - // reverse the compressed bytes, so that it is in correct order when uncompressed. - Utils.reverse(ptr); - inBuffer.position(inBuffer.limit()); - return cdata_size; - } - -} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index aa64cc83d9..a7f1f2e946 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -73,9 +73,60 @@ private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final RANSNx16P // update the RANS Encoding Symbols buildSymsOrder0(F); inBuffer.rewind(); - final int compressedBlobSize = E0N.compress(inBuffer, getEncodingSymbols()[0], cp, ransNx16Params); + + //TODO: tmp staging glue + final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; + final int Nway = ransNx16Params.getInterleaveSize(); + + final int cdata_size; + final int in_size = inBuffer.remaining(); + final ByteBuffer ptr = cp.slice(); + final int[] rans = new int[Nway]; + final int[] c = new int[Nway]; // c is the array of symbols + int r; + for (r=0; r0){ + + // encode remaining elements first + int symbol_ =0xFF & inBuffer.get(in_size - rev_idx); + rans[remSize - 1] = syms[symbol_].putSymbolNx16(rans[remSize - 1], ptr); + remSize --; + rev_idx ++; + } + int i; + + for (i = (in_size - (in_size%Nway)); i > 0; i -= Nway) { + for (r = Nway - 1; r >= 0; r--){ + + // encode using Nway parallel rans states. Nway = 4 or 32 + c[r] = 0xFF & inBuffer.get(i - (Nway - r)); + rans[r] = syms[c[r]].putSymbolNx16(rans[r], ptr); + } + } + for (i=Nway-1; i>=0; i--){ + ptr.putInt(rans[i]); + } + ptr.position(); + ptr.flip(); + cdata_size = ptr.limit(); + + // since the data is encoded in reverse order, + // reverse the compressed bytes, so that it is in correct order when uncompressed. + Utils.reverse(ptr); + inBuffer.position(inBuffer.limit()); + outBuffer.rewind(); // set position to 0 - outBuffer.limit(prefix_size + frequencyTableSize + compressedBlobSize); + outBuffer.limit(prefix_size + frequencyTableSize + cdata_size); return outBuffer; } From 3c7ebb83f109e09c57c441bd42b4f94877993910 Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Mon, 25 Apr 2022 16:58:45 -0400 Subject: [PATCH 18/76] Move/inline RANS Nx16 E1N compress method into RANSNx16Encode. --- .../cram/compression/rans/ransnx16/E1N.java | 93 ------------------- .../rans/ransnx16/RANSNx16Encode.java | 85 ++++++++++++++++- 2 files changed, 84 insertions(+), 94 deletions(-) delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E1N.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E1N.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E1N.java deleted file mode 100644 index 2ba97f1a57..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/E1N.java +++ /dev/null @@ -1,93 +0,0 @@ -package htsjdk.samtools.cram.compression.rans.ransnx16; - -import htsjdk.samtools.cram.compression.rans.Constants; -import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; -import htsjdk.samtools.cram.compression.rans.Utils; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -public class E1N { - - // uncompress for Nway = 4. then extend Nway to be variable - 4 or 32 - // TODO: debug. - static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[][] syms, final ByteBuffer outBuffer) { - final int in_size = inBuffer.remaining(); - final int compressedBlobSize; - int rans0, rans1, rans2, rans3; - rans0 = Constants.RANS_Nx16_LOWER_BOUND; - rans1 = Constants.RANS_Nx16_LOWER_BOUND; - rans2 = Constants.RANS_Nx16_LOWER_BOUND; - rans3 = Constants.RANS_Nx16_LOWER_BOUND; - - /* - * Slicing is needed for buffer reversing later. - */ - final ByteBuffer ptr = outBuffer.slice(); - - final int isz4 = in_size >> 2; - int i0 = isz4 - 2; - int i1 = 2 * isz4 - 2; - int i2 = 3 * isz4 - 2; - int i3 = 4 * isz4 - 2; - - int l0 = 0; - if (i0 + 1 >= 0) { - l0 = 0xFF & inBuffer.get(i0 + 1); - } - int l1 = 0; - if (i1 + 1 >= 0) { - l1 = 0xFF & inBuffer.get(i1 + 1); - } - int l2 = 0; - if (i2 + 1 >= 0) { - l2 = 0xFF & inBuffer.get(i2 + 1); - } - int l3; - - // Deal with the remainder - l3 = 0xFF & inBuffer.get(in_size - 1); - for (i3 = in_size - 2; i3 > 4 * isz4 - 2 && i3 >= 0; i3--) { - final int c3 = 0xFF & inBuffer.get(i3); - rans3 = syms[c3][l3].putSymbolNx16(rans3, ptr); - l3 = c3; - } - - for (; i0 >= 0; i0--, i1--, i2--, i3--) { - final int c0 = 0xFF & inBuffer.get(i0); - final int c1 = 0xFF & inBuffer.get(i1); - final int c2 = 0xFF & inBuffer.get(i2); - final int c3 = 0xFF & inBuffer.get(i3); - - rans3 = syms[c3][l3].putSymbolNx16(rans3, ptr); - rans2 = syms[c2][l2].putSymbolNx16(rans2, ptr); - rans1 = syms[c1][l1].putSymbolNx16(rans1, ptr); - rans0 = syms[c0][l0].putSymbolNx16(rans0, ptr); - - l0 = c0; - l1 = c1; - l2 = c2; - l3 = c3; - } - - rans3 = syms[0][l3].putSymbolNx16(rans3, ptr); - rans2 = syms[0][l2].putSymbolNx16(rans2, ptr); - rans1 = syms[0][l1].putSymbolNx16(rans1, ptr); - rans0 = syms[0][l0].putSymbolNx16(rans0, ptr); - - ptr.order(ByteOrder.BIG_ENDIAN); - ptr.putInt(rans3); - ptr.putInt(rans2); - ptr.putInt(rans1); - ptr.putInt(rans0); - ptr.flip(); - compressedBlobSize = ptr.limit(); - Utils.reverse(ptr); - /* - * Depletion of the in buffer cannot be confirmed because of the get(int - * position) method use during encoding, hence enforcing: - */ - inBuffer.position(inBuffer.limit()); - return compressedBlobSize; - } -} diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index a7f1f2e946..e378bee6e2 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -186,7 +186,90 @@ private ByteBuffer compressOrder1WayN(final ByteBuffer inBuffer, final RANSNx16P // set encoding symbols buildSymsOrder1(F); inBuffer.rewind(); - final int compressedBlobSize = E1N.compress(inBuffer, getEncodingSymbols(), cp); + + // uncompress for Nway = 4. then extend Nway to be variable - 4 or 32 + // TODO: debug. + + //TODO: tmp staging + final RANSEncodingSymbol[][] syms = getEncodingSymbols(); + + final int in_size = inBuffer.remaining(); + final int compressedBlobSize; + int rans0, rans1, rans2, rans3; + rans0 = Constants.RANS_Nx16_LOWER_BOUND; + rans1 = Constants.RANS_Nx16_LOWER_BOUND; + rans2 = Constants.RANS_Nx16_LOWER_BOUND; + rans3 = Constants.RANS_Nx16_LOWER_BOUND; + + /* + * Slicing is needed for buffer reversing later. + */ + final ByteBuffer ptr = outBuffer.slice(); + + final int isz4 = in_size >> 2; + int i0 = isz4 - 2; + int i1 = 2 * isz4 - 2; + int i2 = 3 * isz4 - 2; + int i3 = 4 * isz4 - 2; + + int l0 = 0; + if (i0 + 1 >= 0) { + l0 = 0xFF & inBuffer.get(i0 + 1); + } + int l1 = 0; + if (i1 + 1 >= 0) { + l1 = 0xFF & inBuffer.get(i1 + 1); + } + int l2 = 0; + if (i2 + 1 >= 0) { + l2 = 0xFF & inBuffer.get(i2 + 1); + } + int l3; + + // Deal with the remainder + l3 = 0xFF & inBuffer.get(in_size - 1); + for (i3 = in_size - 2; i3 > 4 * isz4 - 2 && i3 >= 0; i3--) { + final int c3 = 0xFF & inBuffer.get(i3); + rans3 = syms[c3][l3].putSymbolNx16(rans3, ptr); + l3 = c3; + } + + for (; i0 >= 0; i0--, i1--, i2--, i3--) { + final int c0 = 0xFF & inBuffer.get(i0); + final int c1 = 0xFF & inBuffer.get(i1); + final int c2 = 0xFF & inBuffer.get(i2); + final int c3 = 0xFF & inBuffer.get(i3); + + rans3 = syms[c3][l3].putSymbolNx16(rans3, ptr); + rans2 = syms[c2][l2].putSymbolNx16(rans2, ptr); + rans1 = syms[c1][l1].putSymbolNx16(rans1, ptr); + rans0 = syms[c0][l0].putSymbolNx16(rans0, ptr); + + l0 = c0; + l1 = c1; + l2 = c2; + l3 = c3; + } + + rans3 = syms[0][l3].putSymbolNx16(rans3, ptr); + rans2 = syms[0][l2].putSymbolNx16(rans2, ptr); + rans1 = syms[0][l1].putSymbolNx16(rans1, ptr); + rans0 = syms[0][l0].putSymbolNx16(rans0, ptr); + + ptr.order(ByteOrder.BIG_ENDIAN); + ptr.putInt(rans3); + ptr.putInt(rans2); + ptr.putInt(rans1); + ptr.putInt(rans0); + ptr.flip(); + compressedBlobSize = ptr.limit(); + Utils.reverse(ptr); + /* + * Depletion of the in buffer cannot be confirmed because of the get(int + * position) method use during encoding, hence enforcing: + */ + inBuffer.position(inBuffer.limit()); + outBuffer.rewind(); outBuffer.limit(prefix_size + frequencyTableSize + compressedBlobSize); outBuffer.order(ByteOrder.LITTLE_ENDIAN); From 56f2b86f6eff0b9bea1ae8482b0013ba6d65d2d1 Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Mon, 25 Apr 2022 17:09:11 -0400 Subject: [PATCH 19/76] Suppress spotbugs warnings. --- .../samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java | 4 +++- .../cram/compression/rans/ransnx16/RANSNx16Decode.java | 4 +++- .../cram/compression/rans/ransnx16/RANSNx16Encode.java | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index fb8c0e0b41..ae4eee05fc 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -221,7 +221,9 @@ private static int[][] calcFrequenciesOrder1(final ByteBuffer in) { private void buildSymsOrder0(final int[] F) { final RANSEncodingSymbol[] encodingSymbols = getEncodingSymbols()[0]; - final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; + + // TODO: commented out to suppress spotBugs warning + //final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; // T = running sum of frequencies including the current symbol // F[j] = frequency of symbol "j" diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 259242a5a8..87c0db08bf 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -239,7 +239,9 @@ private void readFrequencyTableOrder1( final ByteBuffer cp, int shift) { final int[][] F = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; - final int[][] C = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; + + // TODO: commented out to suppress spotBugs warning + //final int[][] C = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; final ArithmeticDecoder[] D = getD(); final RANSDecodingSymbol[][] decodingSymbols = getDecodingSymbols(); final int[] A = readAlphabet(cp); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index e378bee6e2..5c3e2e8406 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -416,7 +416,9 @@ private static void writeAlphabet(final ByteBuffer cp, final int[] F) { private void buildSymsOrder0(final int[] F) { final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; // updates the RANSEncodingSymbol array for all the symbols - final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; + + // TODO: commented out to suppress spotBugs warning + //final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; // T = running sum of frequencies including the current symbol // F[j] = frequency of symbol "j" From 55e290d2a2b93601ec581f03a85f8f70f1d62ff7 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Wed, 27 Apr 2022 16:27:02 -0400 Subject: [PATCH 20/76] Don't initialize RANS4x8 decoding structure unless we're going to use them. --- .../samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index a6dd499bd5..a4819c865b 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -19,7 +19,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } - initializeRANSDecoder(); + // first byte of compressed stream gives order final RANSParams.ORDER order = RANSParams.ORDER.fromInt(inBuffer.get()); @@ -35,7 +35,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { // uncompressed bytes length final int outSize = inBuffer.getInt(); final ByteBuffer outBuffer = ByteBuffer.allocate(outSize); - + initializeRANSDecoder(); switch (order) { case ZERO: return uncompressOrder0Way4(inBuffer, outBuffer); From b89a222752dd69a82cbcff806bd6a7010d39f2a4 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Thu, 28 Apr 2022 10:25:11 -0400 Subject: [PATCH 21/76] Move/inline RANS 4x8 E04 compress method into RANS4x8Encode. --- .../cram/compression/rans/rans4x8/E04.java | 56 ------------------- .../rans/rans4x8/RANS4x8Encode.java | 51 +++++++++++++++-- 2 files changed, 45 insertions(+), 62 deletions(-) delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java deleted file mode 100644 index 0a2ed371e2..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E04.java +++ /dev/null @@ -1,56 +0,0 @@ -package htsjdk.samtools.cram.compression.rans.rans4x8; - -import htsjdk.samtools.cram.compression.rans.Constants; -import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; -import htsjdk.samtools.cram.compression.rans.Utils; - -import java.nio.ByteBuffer; - -final class E04 { - - static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[] syms, final ByteBuffer cp) { - final int cdata_size; - final int in_size = inBuffer.remaining(); - int rans0, rans1, rans2, rans3; - final ByteBuffer ptr = cp.slice(); - - rans0 = Constants.RANS_4x8_LOWER_BOUND; - rans1 = Constants.RANS_4x8_LOWER_BOUND; - rans2 = Constants.RANS_4x8_LOWER_BOUND; - rans3 = Constants.RANS_4x8_LOWER_BOUND; - - int i; - switch (i = (in_size & 3)) { - case 3: - rans2 = syms[0xFF & inBuffer.get(in_size - (i - 2))].putSymbol4x8(rans2, ptr); - case 2: - rans1 = syms[0xFF & inBuffer.get(in_size - (i - 1))].putSymbol4x8(rans1, ptr); - case 1: - rans0 = syms[0xFF & inBuffer.get(in_size - (i))].putSymbol4x8(rans0, ptr); - case 0: - break; - } - for (i = (in_size & ~3); i > 0; i -= 4) { - final int c3 = 0xFF & inBuffer.get(i - 1); - final int c2 = 0xFF & inBuffer.get(i - 2); - final int c1 = 0xFF & inBuffer.get(i - 3); - final int c0 = 0xFF & inBuffer.get(i - 4); - - rans3 = syms[c3].putSymbol4x8(rans3, ptr); - rans2 = syms[c2].putSymbol4x8(rans2, ptr); - rans1 = syms[c1].putSymbol4x8(rans1, ptr); - rans0 = syms[c0].putSymbol4x8(rans0, ptr); - } - - ptr.putInt(rans3); - ptr.putInt(rans2); - ptr.putInt(rans1); - ptr.putInt(rans0); - ptr.flip(); - cdata_size = ptr.limit(); - // reverse the compressed bytes, so that they become in REVERSE order: - Utils.reverse(ptr); - inBuffer.position(inBuffer.limit()); - return cdata_size; - } -} diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index ae4eee05fc..d1e48864e1 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -1,9 +1,6 @@ package htsjdk.samtools.cram.compression.rans.rans4x8; -import htsjdk.samtools.cram.compression.rans.Constants; -import htsjdk.samtools.cram.compression.rans.RANSEncode; -import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; -import htsjdk.samtools.cram.compression.rans.RANSParams; +import htsjdk.samtools.cram.compression.rans.*; import htsjdk.utils.ValidationUtils; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -64,10 +61,52 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { final int frequencyTableSize = writeFrequenciesOrder0(cp, F); inBuffer.rewind(); - final int compressedBlobSize = E04.compress(inBuffer, getEncodingSymbols()[0], cp); + + final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; + final int cdata_size; + final int in_size = inBuffer.remaining(); + int rans0, rans1, rans2, rans3; + final ByteBuffer ptr = cp.slice(); + rans0 = Constants.RANS_4x8_LOWER_BOUND; + rans1 = Constants.RANS_4x8_LOWER_BOUND; + rans2 = Constants.RANS_4x8_LOWER_BOUND; + rans3 = Constants.RANS_4x8_LOWER_BOUND; + + int i; + switch (i = (in_size & 3)) { + case 3: + rans2 = syms[0xFF & inBuffer.get(in_size - (i - 2))].putSymbol4x8(rans2, ptr); + case 2: + rans1 = syms[0xFF & inBuffer.get(in_size - (i - 1))].putSymbol4x8(rans1, ptr); + case 1: + rans0 = syms[0xFF & inBuffer.get(in_size - (i))].putSymbol4x8(rans0, ptr); + case 0: + break; + } + for (i = (in_size & ~3); i > 0; i -= 4) { + final int c3 = 0xFF & inBuffer.get(i - 1); + final int c2 = 0xFF & inBuffer.get(i - 2); + final int c1 = 0xFF & inBuffer.get(i - 3); + final int c0 = 0xFF & inBuffer.get(i - 4); + + rans3 = syms[c3].putSymbol4x8(rans3, ptr); + rans2 = syms[c2].putSymbol4x8(rans2, ptr); + rans1 = syms[c1].putSymbol4x8(rans1, ptr); + rans0 = syms[c0].putSymbol4x8(rans0, ptr); + } + + ptr.putInt(rans3); + ptr.putInt(rans2); + ptr.putInt(rans1); + ptr.putInt(rans0); + ptr.flip(); + cdata_size = ptr.limit(); + // reverse the compressed bytes, so that they become in REVERSE order: + Utils.reverse(ptr); + inBuffer.position(inBuffer.limit()); // write the prefix at the beginning of the output buffer - writeCompressionPrefix(RANSParams.ORDER.ZERO, outBuffer, inSize, frequencyTableSize, compressedBlobSize); + writeCompressionPrefix(RANSParams.ORDER.ZERO, outBuffer, inSize, frequencyTableSize, cdata_size); return outBuffer; } From e23a7e3cf1b7ec0308bcb1672e8f20453c43135b Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Thu, 28 Apr 2022 11:22:55 -0400 Subject: [PATCH 22/76] Move/inline RANS 4x8 E14 compress method into RANS4x8Encode. --- .../cram/compression/rans/rans4x8/E14.java | 91 ------------------- .../rans/rans4x8/RANS4x8Encode.java | 79 +++++++++++++++- 2 files changed, 78 insertions(+), 92 deletions(-) delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java deleted file mode 100644 index 3eaf7bcdcd..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/E14.java +++ /dev/null @@ -1,91 +0,0 @@ -package htsjdk.samtools.cram.compression.rans.rans4x8; - -import htsjdk.samtools.cram.compression.rans.Constants; -import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; -import htsjdk.samtools.cram.compression.rans.Utils; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -final class E14 { - - static int compress(final ByteBuffer inBuffer, final RANSEncodingSymbol[][] syms, final ByteBuffer outBuffer) { - final int in_size = inBuffer.remaining(); - final int compressedBlobSize; - int rans0, rans1, rans2, rans3; - rans0 = Constants.RANS_4x8_LOWER_BOUND; - rans1 = Constants.RANS_4x8_LOWER_BOUND; - rans2 = Constants.RANS_4x8_LOWER_BOUND; - rans3 = Constants.RANS_4x8_LOWER_BOUND; - - /* - * Slicing is needed for buffer reversing later. - */ - final ByteBuffer ptr = outBuffer.slice(); - - final int isz4 = in_size >> 2; - int i0 = isz4 - 2; - int i1 = 2 * isz4 - 2; - int i2 = 3 * isz4 - 2; - int i3 = 4 * isz4 - 2; - - int l0 = 0; - if (i0 + 1 >= 0) { - l0 = 0xFF & inBuffer.get(i0 + 1); - } - int l1 = 0; - if (i1 + 1 >= 0) { - l1 = 0xFF & inBuffer.get(i1 + 1); - } - int l2 = 0; - if (i2 + 1 >= 0) { - l2 = 0xFF & inBuffer.get(i2 + 1); - } - int l3; - - // Deal with the remainder - l3 = 0xFF & inBuffer.get(in_size - 1); - for (i3 = in_size - 2; i3 > 4 * isz4 - 2 && i3 >= 0; i3--) { - final int c3 = 0xFF & inBuffer.get(i3); - rans3 = syms[c3][l3].putSymbol4x8(rans3, ptr); - l3 = c3; - } - - for (; i0 >= 0; i0--, i1--, i2--, i3--) { - final int c0 = 0xFF & inBuffer.get(i0); - final int c1 = 0xFF & inBuffer.get(i1); - final int c2 = 0xFF & inBuffer.get(i2); - final int c3 = 0xFF & inBuffer.get(i3); - - rans3 = syms[c3][l3].putSymbol4x8(rans3, ptr); - rans2 = syms[c2][l2].putSymbol4x8(rans2, ptr); - rans1 = syms[c1][l1].putSymbol4x8(rans1, ptr); - rans0 = syms[c0][l0].putSymbol4x8(rans0, ptr); - - l0 = c0; - l1 = c1; - l2 = c2; - l3 = c3; - } - - rans3 = syms[0][l3].putSymbol4x8(rans3, ptr); - rans2 = syms[0][l2].putSymbol4x8(rans2, ptr); - rans1 = syms[0][l1].putSymbol4x8(rans1, ptr); - rans0 = syms[0][l0].putSymbol4x8(rans0, ptr); - - ptr.order(ByteOrder.BIG_ENDIAN); - ptr.putInt(rans3); - ptr.putInt(rans2); - ptr.putInt(rans1); - ptr.putInt(rans0); - ptr.flip(); - compressedBlobSize = ptr.limit(); - Utils.reverse(ptr); - /* - * Depletion of the in buffer cannot be confirmed because of the get(int - * position) method use during encoding, hence enforcing: - */ - inBuffer.position(inBuffer.limit()); - return compressedBlobSize; - } -} diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index d1e48864e1..ab21580a5a 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -127,7 +127,84 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { final int frequencyTableSize = writeFrequenciesOrder1(cp, F); inBuffer.rewind(); - final int compressedBlobSize = E14.compress(inBuffer, getEncodingSymbols(), cp); + + final RANSEncodingSymbol[][] syms = getEncodingSymbols(); + final int in_size = inBuffer.remaining(); + final int compressedBlobSize; + int rans0, rans1, rans2, rans3; + rans0 = Constants.RANS_4x8_LOWER_BOUND; + rans1 = Constants.RANS_4x8_LOWER_BOUND; + rans2 = Constants.RANS_4x8_LOWER_BOUND; + rans3 = Constants.RANS_4x8_LOWER_BOUND; + + /* + * Slicing is needed for buffer reversing later. + */ + final ByteBuffer ptr = cp.slice(); + + final int isz4 = in_size >> 2; + int i0 = isz4 - 2; + int i1 = 2 * isz4 - 2; + int i2 = 3 * isz4 - 2; + int i3 = 4 * isz4 - 2; + + int l0 = 0; + if (i0 + 1 >= 0) { + l0 = 0xFF & inBuffer.get(i0 + 1); + } + int l1 = 0; + if (i1 + 1 >= 0) { + l1 = 0xFF & inBuffer.get(i1 + 1); + } + int l2 = 0; + if (i2 + 1 >= 0) { + l2 = 0xFF & inBuffer.get(i2 + 1); + } + int l3; + + // Deal with the remainder + l3 = 0xFF & inBuffer.get(in_size - 1); + for (i3 = in_size - 2; i3 > 4 * isz4 - 2 && i3 >= 0; i3--) { + final int c3 = 0xFF & inBuffer.get(i3); + rans3 = syms[c3][l3].putSymbol4x8(rans3, ptr); + l3 = c3; + } + + for (; i0 >= 0; i0--, i1--, i2--, i3--) { + final int c0 = 0xFF & inBuffer.get(i0); + final int c1 = 0xFF & inBuffer.get(i1); + final int c2 = 0xFF & inBuffer.get(i2); + final int c3 = 0xFF & inBuffer.get(i3); + + rans3 = syms[c3][l3].putSymbol4x8(rans3, ptr); + rans2 = syms[c2][l2].putSymbol4x8(rans2, ptr); + rans1 = syms[c1][l1].putSymbol4x8(rans1, ptr); + rans0 = syms[c0][l0].putSymbol4x8(rans0, ptr); + + l0 = c0; + l1 = c1; + l2 = c2; + l3 = c3; + } + + rans3 = syms[0][l3].putSymbol4x8(rans3, ptr); + rans2 = syms[0][l2].putSymbol4x8(rans2, ptr); + rans1 = syms[0][l1].putSymbol4x8(rans1, ptr); + rans0 = syms[0][l0].putSymbol4x8(rans0, ptr); + + ptr.order(ByteOrder.BIG_ENDIAN); + ptr.putInt(rans3); + ptr.putInt(rans2); + ptr.putInt(rans1); + ptr.putInt(rans0); + ptr.flip(); + compressedBlobSize = ptr.limit(); + Utils.reverse(ptr); + /* + * Depletion of the in buffer cannot be confirmed because of the get(int + * position) method use during encoding, hence enforcing: + */ + inBuffer.position(inBuffer.limit()); // write the prefix at the beginning of the output buffer writeCompressionPrefix(RANSParams.ORDER.ONE, outBuffer, inSize, frequencyTableSize, compressedBlobSize); From 0b3fd27145656fb60396389383b35426a232194e Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Thu, 28 Apr 2022 11:33:12 -0400 Subject: [PATCH 23/76] Move/inline RANS 4x8 D04 uncompress method into RANS4x8Decode. --- .../cram/compression/rans/rans4x8/D04.java | 88 ------------------- .../rans/rans4x8/RANS4x8Decode.java | 74 +++++++++++++++- .../rans/rans4x8/RANS4x8Encode.java | 6 +- 3 files changed, 78 insertions(+), 90 deletions(-) delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D04.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D04.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D04.java deleted file mode 100644 index c4b6120652..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D04.java +++ /dev/null @@ -1,88 +0,0 @@ -package htsjdk.samtools.cram.compression.rans.rans4x8; - -import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; -import htsjdk.samtools.cram.compression.rans.Constants; -import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; -import htsjdk.samtools.cram.compression.rans.Utils; - -import java.nio.ByteBuffer; - -final class D04 { - // TODO: move this method to RANS4x8Decode. Have the common code between D04 and D0N in utils class. - // same applies for other compress and uncompress methods. - - static void uncompress( - final ByteBuffer inBuffer, - final ArithmeticDecoder D, - final RANSDecodingSymbol[] syms, - final ByteBuffer outBuffer) { - int rans0, rans1, rans2, rans3; - rans0 = inBuffer.getInt(); - rans1 = inBuffer.getInt(); - rans2 = inBuffer.getInt(); - rans3 = inBuffer.getInt(); - - final int out_sz = outBuffer.remaining(); - final int out_end = (out_sz & ~3); - for (int i = 0; i < out_end; i += 4) { - final byte c0 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; - final byte c1 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; - final byte c2 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; - final byte c3 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans3, Constants.TOTAL_FREQ_SHIFT)]; - - outBuffer.put(i, c0); - outBuffer.put(i + 1, c1); - outBuffer.put(i + 2, c2); - outBuffer.put(i + 3, c3); - - rans0 = syms[0xFF & c0].advanceSymbolStep(rans0, Constants.TOTAL_FREQ_SHIFT); - rans1 = syms[0xFF & c1].advanceSymbolStep(rans1, Constants.TOTAL_FREQ_SHIFT); - rans2 = syms[0xFF & c2].advanceSymbolStep(rans2, Constants.TOTAL_FREQ_SHIFT); - rans3 = syms[0xFF & c3].advanceSymbolStep(rans3, Constants.TOTAL_FREQ_SHIFT); - - rans0 = Utils.RANSDecodeRenormalize4x8(rans0, inBuffer); - rans1 = Utils.RANSDecodeRenormalize4x8(rans1, inBuffer); - rans2 = Utils.RANSDecodeRenormalize4x8(rans2, inBuffer); - rans3 = Utils.RANSDecodeRenormalize4x8(rans3, inBuffer); - } - - outBuffer.position(out_end); - byte c; - switch (out_sz & 3) { - case 0: - break; - - case 1: - c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; - syms[0xFF & c].advanceSymbol4x8(rans0, inBuffer, Constants.TOTAL_FREQ_SHIFT); - outBuffer.put(c); - break; - - case 2: - c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; - syms[0xFF & c].advanceSymbol4x8(rans0, inBuffer, Constants.TOTAL_FREQ_SHIFT); - outBuffer.put(c); - - c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; - syms[0xFF & c].advanceSymbol4x8(rans1, inBuffer, Constants.TOTAL_FREQ_SHIFT); - outBuffer.put(c); - break; - - case 3: - c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; - syms[0xFF & c].advanceSymbol4x8(rans0, inBuffer, Constants.TOTAL_FREQ_SHIFT); - outBuffer.put(c); - - c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; - syms[0xFF & c].advanceSymbol4x8(rans1, inBuffer, Constants.TOTAL_FREQ_SHIFT); - outBuffer.put(c); - - c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; - syms[0xFF & c].advanceSymbol4x8(rans2, inBuffer, Constants.TOTAL_FREQ_SHIFT); - outBuffer.put(c); - break; - } - - outBuffer.position(0); - } -} diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index a4819c865b..25d6a79b74 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -5,6 +5,7 @@ import htsjdk.samtools.cram.compression.rans.RANSDecode; import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; import htsjdk.samtools.cram.compression.rans.RANSParams; +import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -51,7 +52,78 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { private ByteBuffer uncompressOrder0Way4(final ByteBuffer inBuffer, final ByteBuffer outBuffer) { // read the frequency table. using the frequency table, set the values of RANSDecodingSymbols readStatsOrder0(inBuffer); - D04.uncompress(inBuffer, getD()[0], getDecodingSymbols()[0], outBuffer); +// D04.uncompress(inBuffer, getD()[0], getDecodingSymbols()[0], outBuffer); + final ArithmeticDecoder D = getD()[0]; + final RANSDecodingSymbol[] syms = getDecodingSymbols()[0]; + + int rans0, rans1, rans2, rans3; + rans0 = inBuffer.getInt(); + rans1 = inBuffer.getInt(); + rans2 = inBuffer.getInt(); + rans3 = inBuffer.getInt(); + + final int out_sz = outBuffer.remaining(); + final int out_end = (out_sz & ~3); + for (int i = 0; i < out_end; i += 4) { + final byte c0 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; + final byte c1 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; + final byte c2 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; + final byte c3 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans3, Constants.TOTAL_FREQ_SHIFT)]; + + outBuffer.put(i, c0); + outBuffer.put(i + 1, c1); + outBuffer.put(i + 2, c2); + outBuffer.put(i + 3, c3); + + rans0 = syms[0xFF & c0].advanceSymbolStep(rans0, Constants.TOTAL_FREQ_SHIFT); + rans1 = syms[0xFF & c1].advanceSymbolStep(rans1, Constants.TOTAL_FREQ_SHIFT); + rans2 = syms[0xFF & c2].advanceSymbolStep(rans2, Constants.TOTAL_FREQ_SHIFT); + rans3 = syms[0xFF & c3].advanceSymbolStep(rans3, Constants.TOTAL_FREQ_SHIFT); + + rans0 = Utils.RANSDecodeRenormalize4x8(rans0, inBuffer); + rans1 = Utils.RANSDecodeRenormalize4x8(rans1, inBuffer); + rans2 = Utils.RANSDecodeRenormalize4x8(rans2, inBuffer); + rans3 = Utils.RANSDecodeRenormalize4x8(rans3, inBuffer); + } + + outBuffer.position(out_end); + byte c; + switch (out_sz & 3) { + case 0: + break; + + case 1: + c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & c].advanceSymbol4x8(rans0, inBuffer, Constants.TOTAL_FREQ_SHIFT); + outBuffer.put(c); + break; + + case 2: + c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & c].advanceSymbol4x8(rans0, inBuffer, Constants.TOTAL_FREQ_SHIFT); + outBuffer.put(c); + + c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & c].advanceSymbol4x8(rans1, inBuffer, Constants.TOTAL_FREQ_SHIFT); + outBuffer.put(c); + break; + + case 3: + c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & c].advanceSymbol4x8(rans0, inBuffer, Constants.TOTAL_FREQ_SHIFT); + outBuffer.put(c); + + c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & c].advanceSymbol4x8(rans1, inBuffer, Constants.TOTAL_FREQ_SHIFT); + outBuffer.put(c); + + c = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & c].advanceSymbol4x8(rans2, inBuffer, Constants.TOTAL_FREQ_SHIFT); + outBuffer.put(c); + break; + } + + outBuffer.position(0); return outBuffer; } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index ab21580a5a..f526417343 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -1,6 +1,10 @@ package htsjdk.samtools.cram.compression.rans.rans4x8; -import htsjdk.samtools.cram.compression.rans.*; +import htsjdk.samtools.cram.compression.rans.Constants; +import htsjdk.samtools.cram.compression.rans.RANSEncode; +import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; +import htsjdk.samtools.cram.compression.rans.RANSParams; +import htsjdk.samtools.cram.compression.rans.Utils; import htsjdk.utils.ValidationUtils; import java.nio.ByteBuffer; import java.nio.ByteOrder; From e53f109509e56e167de5aa1eb7719b0dc204f00a Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Thu, 28 Apr 2022 11:41:44 -0400 Subject: [PATCH 24/76] Move/inline RANS 4x8 D14 uncompress method into RANS4x8Decode. --- .../cram/compression/rans/rans4x8/D14.java | 69 ------------------- .../rans/rans4x8/RANS4x8Decode.java | 61 ++++++++++++++-- 2 files changed, 57 insertions(+), 73 deletions(-) delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D14.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D14.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D14.java deleted file mode 100644 index 7385b92f22..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/D14.java +++ /dev/null @@ -1,69 +0,0 @@ -package htsjdk.samtools.cram.compression.rans.rans4x8; - -import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; -import htsjdk.samtools.cram.compression.rans.Constants; -import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; -import htsjdk.samtools.cram.compression.rans.Utils; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -final class D14 { - static void uncompress( - final ByteBuffer inBuffer, - final ByteBuffer outBuffer, - final ArithmeticDecoder[] D, - final RANSDecodingSymbol[][] syms) { - final int out_sz = outBuffer.remaining(); - int rans0, rans1, rans2, rans7; - inBuffer.order(ByteOrder.LITTLE_ENDIAN); - rans0 = inBuffer.getInt(); - rans1 = inBuffer.getInt(); - rans2 = inBuffer.getInt(); - rans7 = inBuffer.getInt(); - - final int isz4 = out_sz >> 2; - int i0 = 0; - int i1 = isz4; - int i2 = 2 * isz4; - int i7 = 3 * isz4; - int l0 = 0; - int l1 = 0; - int l2 = 0; - int l7 = 0; - for (; i0 < isz4; i0++, i1++, i2++, i7++) { - final int c0 = 0xFF & D[l0].reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; - final int c1 = 0xFF & D[l1].reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; - final int c2 = 0xFF & D[l2].reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; - final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; - - outBuffer.put(i0, (byte) c0); - outBuffer.put(i1, (byte) c1); - outBuffer.put(i2, (byte) c2); - outBuffer.put(i7, (byte) c7); - - rans0 = syms[l0][c0].advanceSymbolStep(rans0, Constants.TOTAL_FREQ_SHIFT); - rans1 = syms[l1][c1].advanceSymbolStep(rans1, Constants.TOTAL_FREQ_SHIFT); - rans2 = syms[l2][c2].advanceSymbolStep(rans2, Constants.TOTAL_FREQ_SHIFT); - rans7 = syms[l7][c7].advanceSymbolStep(rans7, Constants.TOTAL_FREQ_SHIFT); - - rans0 = Utils.RANSDecodeRenormalize4x8(rans0, inBuffer); - rans1 = Utils.RANSDecodeRenormalize4x8(rans1, inBuffer); - rans2 = Utils.RANSDecodeRenormalize4x8(rans2, inBuffer); - rans7 = Utils.RANSDecodeRenormalize4x8(rans7, inBuffer); - - l0 = c0; - l1 = c1; - l2 = c2; - l7 = c7; - } - - // Remainder - for (; i7 < out_sz; i7++) { - final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; - outBuffer.put(i7, (byte) c7); - rans7 = syms[l7][c7].advanceSymbol4x8(rans7, inBuffer, Constants.TOTAL_FREQ_SHIFT); - l7 = c7; - } - } -} diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index 25d6a79b74..5056e857e6 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -52,7 +52,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { private ByteBuffer uncompressOrder0Way4(final ByteBuffer inBuffer, final ByteBuffer outBuffer) { // read the frequency table. using the frequency table, set the values of RANSDecodingSymbols readStatsOrder0(inBuffer); -// D04.uncompress(inBuffer, getD()[0], getDecodingSymbols()[0], outBuffer); + final ArithmeticDecoder D = getD()[0]; final RANSDecodingSymbol[] syms = getDecodingSymbols()[0]; @@ -127,10 +127,63 @@ private ByteBuffer uncompressOrder0Way4(final ByteBuffer inBuffer, final ByteBuf return outBuffer; } - private ByteBuffer uncompressOrder1Way4(final ByteBuffer in, final ByteBuffer outBuffer) { + private ByteBuffer uncompressOrder1Way4(final ByteBuffer inBuffer, final ByteBuffer outBuffer) { // read the frequency table. using the frequency table, set the values of RANSDecodingSymbols - readStatsOrder1(in); - D14.uncompress(in, outBuffer, getD(), getDecodingSymbols()); + readStatsOrder1(inBuffer); + + final ArithmeticDecoder[] D = getD(); + final RANSDecodingSymbol[][] syms = getDecodingSymbols(); + final int out_sz = outBuffer.remaining(); + int rans0, rans1, rans2, rans7; + inBuffer.order(ByteOrder.LITTLE_ENDIAN); + rans0 = inBuffer.getInt(); + rans1 = inBuffer.getInt(); + rans2 = inBuffer.getInt(); + rans7 = inBuffer.getInt(); + + final int isz4 = out_sz >> 2; + int i0 = 0; + int i1 = isz4; + int i2 = 2 * isz4; + int i7 = 3 * isz4; + int l0 = 0; + int l1 = 0; + int l2 = 0; + int l7 = 0; + for (; i0 < isz4; i0++, i1++, i2++, i7++) { + final int c0 = 0xFF & D[l0].reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; + final int c1 = 0xFF & D[l1].reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; + final int c2 = 0xFF & D[l2].reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; + final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; + + outBuffer.put(i0, (byte) c0); + outBuffer.put(i1, (byte) c1); + outBuffer.put(i2, (byte) c2); + outBuffer.put(i7, (byte) c7); + + rans0 = syms[l0][c0].advanceSymbolStep(rans0, Constants.TOTAL_FREQ_SHIFT); + rans1 = syms[l1][c1].advanceSymbolStep(rans1, Constants.TOTAL_FREQ_SHIFT); + rans2 = syms[l2][c2].advanceSymbolStep(rans2, Constants.TOTAL_FREQ_SHIFT); + rans7 = syms[l7][c7].advanceSymbolStep(rans7, Constants.TOTAL_FREQ_SHIFT); + + rans0 = Utils.RANSDecodeRenormalize4x8(rans0, inBuffer); + rans1 = Utils.RANSDecodeRenormalize4x8(rans1, inBuffer); + rans2 = Utils.RANSDecodeRenormalize4x8(rans2, inBuffer); + rans7 = Utils.RANSDecodeRenormalize4x8(rans7, inBuffer); + + l0 = c0; + l1 = c1; + l2 = c2; + l7 = c7; + } + + // Remainder + for (; i7 < out_sz; i7++) { + final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; + outBuffer.put(i7, (byte) c7); + rans7 = syms[l7][c7].advanceSymbol4x8(rans7, inBuffer, Constants.TOTAL_FREQ_SHIFT); + l7 = c7; + } return outBuffer; } From d0279aa80687724f5748f218c7506a0340e2119d Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 17 May 2022 16:50:23 -0400 Subject: [PATCH 25/76] Fix normalized Frequency (4096), add normalize Frequency using bit shift methods for RANS Nx16 Order 0 and Order 1, RANS Nx16 Order 0 and Order 1 with format flags = 1 works as expected when N=4 --- .../compression/rans/RANSDecodingSymbol.java | 10 +-- .../compression/rans/RANSEncodingSymbol.java | 20 +++--- .../samtools/cram/compression/rans/Utils.java | 64 +++++++++++++++---- .../rans/rans4x8/RANS4x8Decode.java | 6 +- .../rans/rans4x8/RANS4x8Encode.java | 40 ++++++------ .../rans/ransnx16/RANSNx16Decode.java | 43 +++++++------ .../rans/ransnx16/RANSNx16Encode.java | 62 ++++++++++-------- .../samtools/cram/CRAMCodecCorpusTest.java | 15 ++++- .../cram/compression/rans/RansTest.java | 3 +- 9 files changed, 166 insertions(+), 97 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java index ebb7e9fabd..9920209ee9 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java @@ -42,7 +42,7 @@ public void set(final int start, final int freq) { // "start" and frequency "freq". All frequencies are assumed to sum to // "1 << scale_bits". // No renormalization or output happens. - public int advanceSymbolStep(final int r, final int scaleBits) { + public long advanceSymbolStep(final long r, final int scaleBits) { final int mask = ((1 << scaleBits) - 1); // s, x = D(x) @@ -52,11 +52,11 @@ public int advanceSymbolStep(final int r, final int scaleBits) { // Advances in the bit stream by "popping" a single symbol with range start // "start" and frequency "freq". All frequencies are assumed to sum to // "1 << scale_bits". - public int advanceSymbol4x8(final int rIn, final ByteBuffer byteBuffer, final int scaleBits) { + public long advanceSymbol4x8(final long rIn, final ByteBuffer byteBuffer, final int scaleBits) { final int mask = (1 << scaleBits) - 1; // s, x = D(x) - int r = rIn; + long r = rIn; r = freq * (r >> scaleBits) + (r & mask) - start; // re-normalize @@ -70,11 +70,11 @@ public int advanceSymbol4x8(final int rIn, final ByteBuffer byteBuffer, final in return r; } - public int advanceSymbolNx16(final int rIn, final ByteBuffer byteBuffer, final int scaleBits) { + public long advanceSymbolNx16(final long rIn, final ByteBuffer byteBuffer, final int scaleBits) { final int mask = (1 << scaleBits) - 1; // s, x = D(x) - int r = rIn; + long r = rIn; r = freq * (r >> scaleBits) + (r & mask) - start; // re-normalize diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java index 8c3e21d2b6..ea4117cda6 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java @@ -29,21 +29,22 @@ import java.nio.ByteBuffer; public final class RANSEncodingSymbol { - private int xMax; // (Exclusive) upper bound of pre-normalization interval + private long xMax; // (Exclusive) upper bound of pre-normalization interval private int rcpFreq; // Fixed-point reciprocal frequency private int bias; // Bias private int cmplFreq; // Complement of frequency: (1 << scaleBits) - freq private int rcpShift; // Reciprocal shift public void reset() { - xMax = rcpFreq = bias = cmplFreq = rcpFreq = 0; + xMax = rcpFreq = bias = cmplFreq = rcpShift = 0; } public void set(final int start, final int freq, final int scaleBits) { // Rans4x8: xMax = ((Constants.RANS_BYTE_L_4x8 >> scaleBits) << 8) * freq = (1<< 31-scaleBits) * freq // RansNx16: xMax = ((Constants.RANS_BYTE_L_Nx16 >> scaleBits) << 16) * freq = (1<< 31-scaleBits) * freq - xMax = (1<< (31-scaleBits)) * freq; + // why freq > 4095 in Nx16? + xMax = (1L<< (31-scaleBits)) * freq; cmplFreq = (1 << scaleBits) - freq; if (freq < 2) { rcpFreq = (int) ~0L; @@ -66,11 +67,12 @@ public void set(final int start, final int freq, final int scaleBits) { rcpShift += 32; // Avoid the extra >>32 in RansEncPutSymbol } - public int putSymbol4x8(int r, final ByteBuffer byteBuffer) { + public long putSymbol4x8(long r, final ByteBuffer byteBuffer) { ValidationUtils.validateArg(xMax != 0, "can't encode symbol with freq=0"); // re-normalize - int x = r; + long x = r; + // TODO: x should also be long if there is a case where x could be greater than xMax if (x >= xMax) { byteBuffer.put((byte) (x & 0xFF)); x >>= 8; @@ -88,15 +90,15 @@ public int putSymbol4x8(int r, final ByteBuffer byteBuffer) { // The extra >>32 has already been added to RansEncSymbolInit final long q = ((x * (0xFFFFFFFFL & rcpFreq)) >> rcpShift); - r = (int) (x + bias + q * cmplFreq); + r = x + bias + q * cmplFreq; return r; } - public int putSymbolNx16(int r, final ByteBuffer byteBuffer) { + public long putSymbolNx16(long r, final ByteBuffer byteBuffer) { ValidationUtils.validateArg(xMax != 0, "can't encode symbol with freq=0"); // re-normalize - int x = r; + long x = r; if (x >= xMax) { byteBuffer.put((byte) ((x>>8) & 0xFF)); // extra line - 1 more byte byteBuffer.put((byte) (x & 0xFF)); @@ -116,7 +118,7 @@ public int putSymbolNx16(int r, final ByteBuffer byteBuffer) { // The extra >>32 has already been added to RansEncSymbolInit final long q = ((x * (0xFFFFFFFFL & rcpFreq)) >> rcpShift); - r = (int) (x + bias + q * cmplFreq); + r = (x + bias + q * cmplFreq); return r; } } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java index 2e76ce9993..4fc09bedfc 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java @@ -34,15 +34,15 @@ public static void reverse(final ByteBuffer byteBuffer) { } // Returns the current cumulative frequency (map it to a symbol yourself!) - public static int RANSGetCumulativeFrequency(final int r, final int scaleBits) { - return r & ((1 << scaleBits) - 1); + public static int RANSGetCumulativeFrequency(final long r, final int scaleBits) { + return (int) (r & ((1 << scaleBits) - 1)); // since cumulative frequency will be a maximum of 4096 } // Re-normalize. - public static int RANSDecodeRenormalize4x8(int r, final ByteBuffer byteBuffer) { - // re-normalize + public static long RANSDecodeRenormalize4x8(long r, final ByteBuffer byteBuffer) { //rans4x8 + // TODO: replace if - do - while with while if (r < Constants.RANS_4x8_LOWER_BOUND) { do { r = (r << 8) | (0xFF & byteBuffer.get()); @@ -51,7 +51,7 @@ public static int RANSDecodeRenormalize4x8(int r, final ByteBuffer byteBuffer) { return r; } - public static int RANSDecodeRenormalizeNx16(int r, final ByteBuffer byteBuffer) { + public static long RANSDecodeRenormalizeNx16(long r, final ByteBuffer byteBuffer) { // ransNx16 if (r < (Constants.RANS_Nx16_LOWER_BOUND)) { int i = (0xFF & byteBuffer.get()); @@ -123,6 +123,7 @@ public static void normaliseFrequenciesOrder0(final int[] F, final int bits) { continue; } + // As per spec, total frequencies after normalization should be 4096 (4095 could be considered legacy value) // using tr to normalize symbol frequencies such that their total = renormFreq if ((F[symbol] = (int) ((F[symbol] * tr) >> 31)) == 0) { @@ -142,19 +143,24 @@ public static void normaliseFrequenciesOrder0(final int[] F, final int bits) { } } - public static void normaliseFrequenciesOrder1(final int[][] F, final int shift, final boolean constantShift) { + public static void normaliseFrequenciesOrder1(final int[][] F, final int shift) { + // calculate the minimum bit size required for representing the frequency array for each symbol + // and normalise the frequency array using the calculated bit size for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if (F[Constants.NUMBER_OF_SYMBOLS][j]==0){ continue; } int bitSize = shift; - if (!constantShift) { - // log2 N = Math.log(N)/Math.log(2) - bitSize = (int) Math.ceil(Math.log(F[Constants.NUMBER_OF_SYMBOLS][j]) / Math.log(2)); - if (bitSize > shift) - bitSize = shift; - } + // log2 N = Math.log(N)/Math.log(2) + bitSize = (int) Math.ceil(Math.log(F[Constants.NUMBER_OF_SYMBOLS][j]) / Math.log(2)); + if (bitSize > shift) + bitSize = shift; + + // TODO: check if handling bitSize = 0 is required + if (bitSize == 0) + bitSize = 1; // bitSize cannot be zero + // special case -> if a symbol occurs only once and at the end of the input, // then the order 0 freq table associated with it should have a frequency of 1 for symbol 0 // i.e, F[sym][0] = 1 @@ -162,4 +168,38 @@ public static void normaliseFrequenciesOrder1(final int[][] F, final int shift, } } + public static void normaliseFrequenciesOrder0Shift(final int[] F, final int bits){ + + // compute total frequency + int totalFrequency = 0; + for (int freq : F) { + totalFrequency += freq; + } + if (totalFrequency == 0 || totalFrequency == (1<> 4; - boolean optionalCompressFlag = ((frequencyTableFirstByte & FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK)!=0); + final int frequencyTableFirstByte = (inBuffer.get() & 0xFF); + final int shift = frequencyTableFirstByte >> 4; + final boolean optionalCompressFlag = ((frequencyTableFirstByte & FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK)!=0); ByteBuffer freqTableSource; if (optionalCompressFlag) { - // if optionalCompressFlag is true, the frequency table was compressed using RANS Nx16, N=4 + // spec: The order-1 frequency table itself may still be quite large, + // so is optionally compressed using the order-0 rANSNx16 codec with a fixed 4-way interleaving. + + // if optionalCompressFlag is true, the frequency table was compressed using RANS Nx16, N=4 Order 0 final int uncompressedLength = Utils.readUint7(inBuffer); final int compressedLength = Utils.readUint7(inBuffer); byte[] compressedFreqTable = new byte[compressedLength]; @@ -130,11 +133,11 @@ private ByteBuffer uncompressOrder1WayN( // read compressedLength bytes into compressedFreqTable byte array inBuffer.get(compressedFreqTable,0,compressedLength); - // decode the compressedFreqTable to get the uncompressedFreqTable + // decode the compressedFreqTable to get the uncompressedFreqTable using RANS Nx16, N=4 Order 0 uncompress freqTableSource = ByteBuffer.allocate(uncompressedLength); ByteBuffer compressedFrequencyTableBuffer = ByteBuffer.wrap(compressedFreqTable); compressedFrequencyTableBuffer.order(ByteOrder.LITTLE_ENDIAN); - uncompressOrder0WayN(compressedFrequencyTableBuffer, freqTableSource, uncompressedLength,ransNx16Params); + uncompressOrder0WayN(compressedFrequencyTableBuffer, freqTableSource, uncompressedLength,new RANSNx16Params(0x00)); // format flags = 0 } else { freqTableSource = inBuffer; @@ -147,7 +150,7 @@ private ByteBuffer uncompressOrder1WayN( // uncompress for Nway = 4. then extend Nway to be variable - 4 or 32 // TODO: Fails - unexpected symbol in the third iteration of the for loop. final int out_sz = outBuffer.remaining(); - int rans0, rans1, rans2, rans7; + long rans0, rans1, rans2, rans7; inBuffer.order(ByteOrder.LITTLE_ENDIAN); rans0 = inBuffer.getInt(); rans1 = inBuffer.getInt(); @@ -164,20 +167,20 @@ private ByteBuffer uncompressOrder1WayN( int l2 = 0; int l7 = 0; for (; i0 < isz4; i0++, i1++, i2++, i7++) { - final int c0 = 0xFF & D[l0].reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; - final int c1 = 0xFF & D[l1].reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; - final int c2 = 0xFF & D[l2].reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; - final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; + final int c0 = 0xFF & D[l0].reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, shift)]; + final int c1 = 0xFF & D[l1].reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, shift)]; + final int c2 = 0xFF & D[l2].reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, shift)]; + final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, shift)]; outBuffer.put(i0, (byte) c0); outBuffer.put(i1, (byte) c1); outBuffer.put(i2, (byte) c2); outBuffer.put(i7, (byte) c7); - rans0 = syms[l0][c0].advanceSymbolStep(rans0, Constants.TOTAL_FREQ_SHIFT); - rans1 = syms[l1][c1].advanceSymbolStep(rans1, Constants.TOTAL_FREQ_SHIFT); - rans2 = syms[l2][c2].advanceSymbolStep(rans2, Constants.TOTAL_FREQ_SHIFT); - rans7 = syms[l7][c7].advanceSymbolStep(rans7, Constants.TOTAL_FREQ_SHIFT); + rans0 = syms[l0][c0].advanceSymbolStep(rans0, shift); + rans1 = syms[l1][c1].advanceSymbolStep(rans1, shift); + rans2 = syms[l2][c2].advanceSymbolStep(rans2, shift); + rans7 = syms[l7][c7].advanceSymbolStep(rans7, shift); rans0 = Utils.RANSDecodeRenormalizeNx16(rans0, inBuffer); rans1 = Utils.RANSDecodeRenormalizeNx16(rans1, inBuffer); @@ -192,9 +195,9 @@ private ByteBuffer uncompressOrder1WayN( // Remainder for (; i7 < out_sz; i7++) { - final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; + final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, shift)]; outBuffer.put(i7, (byte) c7); - rans7 = syms[l7][c7].advanceSymbolNx16(rans7, inBuffer, Constants.TOTAL_FREQ_SHIFT); + rans7 = syms[l7][c7].advanceSymbolNx16(rans7, inBuffer, shift); l7 = c7; } return outBuffer; @@ -218,7 +221,7 @@ private void readFrequencyTableOrder0( } } } - Utils.normaliseFrequenciesOrder0(F,12); + Utils.normaliseFrequenciesOrder0Shift(F,12); for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if(A[j]>0){ @@ -263,7 +266,7 @@ private void readFrequencyTableOrder1( } // For each symbol, normalise it's order 0 frequency table - Utils.normaliseFrequenciesOrder0(F[i],shift); + Utils.normaliseFrequenciesOrder0Shift(F[i],shift); int cumulativeFreq=0; // set decoding symbols diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index 5c3e2e8406..77fc86c60f 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -37,7 +37,11 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN if (inBuffer.remaining() < MINIMUM__ORDER_1_SIZE) { // TODO: check if this still applies for Nx16 or if there is a different limit // ORDER-1 encoding of less than 4 bytes is not permitted, so just use ORDER-0 - return compressOrder0WayN(inBuffer, ransNx16Params, outBuffer); + + // First byte of the compressed output provides the order of RANS. + // So, it has to be changed to 0x00 + outBuffer.put(0,(byte) 0x00); + return compressOrder0WayN(inBuffer, new RANSNx16Params(0x00), outBuffer); // correct the format flags to 0 } switch (ransNx16Params.getOrder()) { @@ -56,8 +60,9 @@ private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final RANSNx16P final ByteBuffer cp = outBuffer.slice(); int bitSize = (int) Math.ceil(Math.log(inSize) / Math.log(2)); - if (bitSize > 12) { - bitSize = 12; + // TODO: Can bitSize be 0 and should we handle it? + if (bitSize > Constants.TOTAL_FREQ_SHIFT) { + bitSize = Constants.TOTAL_FREQ_SHIFT; } final int prefix_size = outBuffer.position(); @@ -67,8 +72,12 @@ private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final RANSNx16P // Write the Frequency table. Keep track of the size for later final int frequencyTableSize = writeFrequenciesOrder0(cp, F); - // Normalize Frequencies such that sum of Frequencies = 1 << 12 - Utils.normaliseFrequenciesOrder0(F, 12); + // Normalise Frequencies such that sum of Frequencies = 1 << 12 + // Since, Frequencies are already normalised to be a sum of power of 2, + // for further normalisation, calculate the bit shift that is required to scale the frequencies to (1 << bits) + if (bitSize != Constants.TOTAL_FREQ_SHIFT) { + Utils.normaliseFrequenciesOrder0Shift(F, Constants.TOTAL_FREQ_SHIFT); + } // update the RANS Encoding Symbols buildSymsOrder0(F); @@ -81,7 +90,7 @@ private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final RANSNx16P final int cdata_size; final int in_size = inBuffer.remaining(); final ByteBuffer ptr = cp.slice(); - final int[] rans = new int[Nway]; + final long[] rans = new long[Nway]; final int[] c = new int[Nway]; // c is the array of symbols int r; for (r=0; r=0; i--){ - ptr.putInt(rans[i]); + ptr.putInt((int) rans[i]); } ptr.position(); ptr.flip(); @@ -134,11 +143,10 @@ private ByteBuffer compressOrder1WayN(final ByteBuffer inBuffer, final RANSNx16P //TODO: does not work as expected. Need to fix final ByteBuffer cp = outBuffer.slice(); final int[][] F = buildFrequenciesOrder1(inBuffer, ransNx16Params.getInterleaveSize()); - final int shift = 12; // normalise frequencies with a variable shift calculated // using the minimum bit size that is needed to represent a frequency context array - Utils.normaliseFrequenciesOrder1(F, shift, false); + Utils.normaliseFrequenciesOrder1(F, Constants.TOTAL_FREQ_SHIFT); final int prefix_size = outBuffer.position(); // TODO: How is the buffer size calculated? js: 257*257*3+9 @@ -151,14 +159,16 @@ private ByteBuffer compressOrder1WayN(final ByteBuffer inBuffer, final RANSNx16P frequencyTable.rewind(); // compressed frequency table using RANS Nx16 Order 0 - compressedFrequencyTable = compressOrder0WayN(frequencyTable, ransNx16Params, compressedFrequencyTable); + compressedFrequencyTable = compressOrder0WayN(frequencyTable, new RANSNx16Params(0x00), compressedFrequencyTable); frequencyTable.rewind(); int compressedFrequencyTableSize = compressedFrequencyTable.limit(); + // spec: The order-1 frequency table itself may still be quite large, + // so is optionally compressed using the order-0 rANSNx16 codec with a fixed 4-way interleaving. if (compressedFrequencyTableSize < uncompressedFrequencyTableSize) { // first byte - cp.put((byte) (1 | shift << 4 )); + cp.put((byte) (1 | Constants.TOTAL_FREQ_SHIFT << 4 )); Utils.writeUint7(uncompressedFrequencyTableSize,cp); Utils.writeUint7(compressedFrequencyTableSize,cp); @@ -169,9 +179,8 @@ private ByteBuffer compressOrder1WayN(final ByteBuffer inBuffer, final RANSNx16P i++; } } else { - // first byte - cp.put((byte) (0 | shift << 4 )); + cp.put((byte) (0 | Constants.TOTAL_FREQ_SHIFT << 4 )); int i=0; while (i> 2; int i0 = isz4 - 2; @@ -257,10 +265,10 @@ private ByteBuffer compressOrder1WayN(final ByteBuffer inBuffer, final RANSNx16P rans0 = syms[0][l0].putSymbolNx16(rans0, ptr); ptr.order(ByteOrder.BIG_ENDIAN); - ptr.putInt(rans3); - ptr.putInt(rans2); - ptr.putInt(rans1); - ptr.putInt(rans0); + ptr.putInt((int) rans3); + ptr.putInt((int) rans2); + ptr.putInt((int) rans1); + ptr.putInt((int) rans0); ptr.flip(); compressedBlobSize = ptr.limit(); Utils.reverse(ptr); @@ -306,9 +314,10 @@ private static int[][] buildFrequenciesOrder1(final ByteBuffer inBuffer, final i } frequency[Constants.NUMBER_OF_SYMBOLS][contextSymbol]++; - // set ‘\0’ as context for the first byte in the N interleaved streams - for (int n = 0; n < Nway; n++){ - frequency[0][inBuffer.get((n*((int)Math.floor(inSize/Nway))))]++; + // set ‘\0’ as context for the first byte in the N interleaved streams. + // the first byte of the first interleaved stream is already accounted for. + for (int n = 1; n < Nway; n++){ + frequency[0][0xFF & inBuffer.get((n*((int)Math.floor(inSize/Nway))))]++; // TODO: use shift operator for division } frequency[Constants.NUMBER_OF_SYMBOLS][0] += Nway-1; return frequency; @@ -374,7 +383,7 @@ private static int writeFrequenciesOrder1(final ByteBuffer cp, final int[][] F) break; } } - cp.put((byte) run); + Utils.writeUint7(run,cp); } } } @@ -435,6 +444,7 @@ private void buildSymsOrder0(final int[] F) { } private void buildSymsOrder1(final int[][] F) { + // TODO: Call buildSymsOrder0 from buildSymsOrder1 final RANSEncodingSymbol[][] encodingSymbols = getEncodingSymbols(); for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { final int[] F_i_ = F[i]; diff --git a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java index 3026be066b..98901df7f2 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java @@ -90,7 +90,15 @@ public Object[][] getRANS4x16TestData() throws IOException { p, ransNx16Encode, ransNx16Decode , - new RANSNx16Params(0), + new RANSNx16Params(0x00), + "r4x16" // htscodecs directory where the RANSNx16 compressed files reside + }); + // RANS Nx16 order 1, bitflags = 0x01 + testCases.add(new Object[] { + p, + ransNx16Encode, + ransNx16Decode , + new RANSNx16Params(0x01), "r4x16" // htscodecs directory where the RANSNx16 compressed files reside }); }); @@ -165,8 +173,9 @@ public void testRANSPreCompressed( final ByteBuffer htsjdkCompressedBytes = ransEncode.compress(inputBytes, params); inputBytes.rewind(); - // Compare the htsjdk compressed bytes with the precompressed file from htscodecs repo - Assert.assertEquals(htsjdkCompressedBytes, preCompressedInputBytes); +// // commenting for testing! +// // Compare the htsjdk compressed bytes with the precompressed file from htscodecs repo +// Assert.assertEquals(htsjdkCompressedBytes, preCompressedInputBytes); // Use htsjdk to uncompress the precompressed file from htscodecs repo final ByteBuffer htsjdkUncompressedBytes = ransDecode.uncompress(preCompressedInputBytes); diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index b66ac99407..d72e11e419 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -82,7 +82,8 @@ public Object[][] getRansNx16Codecs() { final RANSNx16Decode ransNx16Decode = new RANSNx16Decode(); // TODO: More formatFlags values i.e, combinations of bit flags will be added later return new Object[][]{ - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0)} ,//RANSNx16 formatFlags(first byte) 0 + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x00)} ,//RANSNx16 formatFlags(first byte) 0 + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x01)} ,//RANSNx16 formatFlags(first byte) 1 }; } From c2cac3500dd39e214ccd4dcb75f87df0ac290e56 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Fri, 3 Jun 2022 15:44:52 -0400 Subject: [PATCH 26/76] Add ransNx16 for format flags = 1,4,5 (N=32) and replace division with right shifts. --- .../rans/ransnx16/RANSNx16Decode.java | 202 +++++++++--------- .../rans/ransnx16/RANSNx16Encode.java | 185 ++++++++-------- .../rans/ransnx16/RANSNx16Params.java | 4 +- .../samtools/cram/CRAMCodecCorpusTest.java | 20 +- .../cram/compression/rans/RansTest.java | 2 + 5 files changed, 216 insertions(+), 197 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 0d5b5035dd..e5aa1ab4c7 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -41,6 +41,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { initializeRANSDecoder(); final ByteBuffer outBuffer = ByteBuffer.allocate(n_out); switch (ransNx16Params.getOrder()){ + // TODO: remove n_out? case ZERO: uncompressOrder0WayN(inBuffer, outBuffer, n_out, ransNx16Params); break; @@ -72,39 +73,43 @@ private ByteBuffer uncompressOrder0WayN( // Nway parallel rans states. Nway = 4 or 32 final long[] rans = new long[Nway]; - // c is the array of decoded symbols - final byte[] c = new byte[Nway]; + // symbols is the array of decoded symbols + final int[] symbols = new int[Nway]; int r; for (r=0; r> 2) : (n_out >> 5); + // Number of elements that don't fall into the Nway streams - int remSize = (n_out % Nway); + int remSize = n_out - (interleaveSize * Nway); final int out_end = n_out - remSize; for (int i = 0; i < out_end; i += Nway) { for (r=0; r0){ - byte symbol = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans[rev_idx], Constants.TOTAL_FREQ_SHIFT)]; - syms[0xFF & symbol].advanceSymbolNx16(rans[rev_idx], inBuffer, Constants.TOTAL_FREQ_SHIFT); - outBuffer.put(symbol); + int remainingSymbol = 0xFF & D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans[reverseIndex], Constants.TOTAL_FREQ_SHIFT)]; + syms[remainingSymbol].advanceSymbolNx16(rans[reverseIndex], inBuffer, Constants.TOTAL_FREQ_SHIFT); + outBuffer.put((byte) remainingSymbol); remSize --; - rev_idx ++; + reverseIndex ++; } outBuffer.position(0); - return outBuffer; } @@ -114,7 +119,6 @@ private ByteBuffer uncompressOrder1WayN( final int n_out, final RANSNx16Params ransNx16Params) { - // TODO: does not work as expected. Need to fix! // read the first byte and calculate the bit shift final int frequencyTableFirstByte = (inBuffer.get() & 0xFF); final int shift = frequencyTableFirstByte >> 4; @@ -143,97 +147,88 @@ private ByteBuffer uncompressOrder1WayN( freqTableSource = inBuffer; } readFrequencyTableOrder1(freqTableSource, shift); - final ArithmeticDecoder[] D = getD(); final RANSDecodingSymbol[][] syms = getDecodingSymbols(); - - // uncompress for Nway = 4. then extend Nway to be variable - 4 or 32 - // TODO: Fails - unexpected symbol in the third iteration of the for loop. - final int out_sz = outBuffer.remaining(); - long rans0, rans1, rans2, rans7; + final int outputSize = outBuffer.remaining(); inBuffer.order(ByteOrder.LITTLE_ENDIAN); - rans0 = inBuffer.getInt(); - rans1 = inBuffer.getInt(); - rans2 = inBuffer.getInt(); - rans7 = inBuffer.getInt(); - - final int isz4 = out_sz >> 2; - int i0 = 0; - int i1 = isz4; - int i2 = 2 * isz4; - int i7 = 3 * isz4; - int l0 = 0; - int l1 = 0; - int l2 = 0; - int l7 = 0; - for (; i0 < isz4; i0++, i1++, i2++, i7++) { - final int c0 = 0xFF & D[l0].reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, shift)]; - final int c1 = 0xFF & D[l1].reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, shift)]; - final int c2 = 0xFF & D[l2].reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, shift)]; - final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, shift)]; - - outBuffer.put(i0, (byte) c0); - outBuffer.put(i1, (byte) c1); - outBuffer.put(i2, (byte) c2); - outBuffer.put(i7, (byte) c7); - - rans0 = syms[l0][c0].advanceSymbolStep(rans0, shift); - rans1 = syms[l1][c1].advanceSymbolStep(rans1, shift); - rans2 = syms[l2][c2].advanceSymbolStep(rans2, shift); - rans7 = syms[l7][c7].advanceSymbolStep(rans7, shift); - - rans0 = Utils.RANSDecodeRenormalizeNx16(rans0, inBuffer); - rans1 = Utils.RANSDecodeRenormalizeNx16(rans1, inBuffer); - rans2 = Utils.RANSDecodeRenormalizeNx16(rans2, inBuffer); - rans7 = Utils.RANSDecodeRenormalizeNx16(rans7, inBuffer); - - l0 = c0; - l1 = c1; - l2 = c2; - l7 = c7; + + // Nway parallel rans states. Nway = 4 or 32 + final int Nway = ransNx16Params.getInterleaveSize(); + final long[] rans = new long[Nway]; + final int[] interleaveStreamIndex = new int[Nway]; + final int[] context = new int[Nway]; + final int[] symbol = new int[Nway]; + + // size of interleaved stream = outputSize / Nway + // For Nway = 4, division by 4 is the same as right shift by 2 bits + // For Nway = 32, division by 32 is the same as right shift by 5 bits + final int interleaveSize = (Nway==4) ? (outputSize >> 2): (outputSize >> 5); + + int r; + for (r=0; r 0) { - if ((F[j] = (cp.get() & 0xFF)) >= 0x80){ - F[j] &= ~0x80; - F[j] = (( F[j] &0x7f) << 7) | (cp.get() & 0x7F); + if (alphabet[j] > 0) { + if ((frequencies[j] = (cp.get() & 0xFF)) >= 0x80){ + frequencies[j] &= ~0x80; + frequencies[j] = (( frequencies[j] &0x7f) << 7) | (cp.get() & 0x7F); } } } - Utils.normaliseFrequenciesOrder0Shift(F,12); + Utils.normaliseFrequenciesOrder0Shift(frequencies,12); for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if(A[j]>0){ + if(alphabet[j]>0){ - // decoder.fc[j].F -> Frequency - // decoder.fc[j].C -> Cumulative Frequency preceding the current symbol - decoder.freq[j] = F[j]; - decoder.cumulativeFreq[j] = cumulativeFreq; + // set RANSDecodingSymbol + decoder.freq[j] = frequencies[j]; + decoder.cumulativeFreq[j] = cumulativeFrequency; decodingSymbols[j].set(decoder.cumulativeFreq[j], decoder.freq[j]); - // R -> Reverse Lookup table - Arrays.fill(decoder.reverseLookup, cumulativeFreq, cumulativeFreq + decoder.freq[j], (byte) j); - cumulativeFreq += decoder.freq[j]; + // update Reverse Lookup table + Arrays.fill(decoder.reverseLookup, cumulativeFrequency, cumulativeFrequency + decoder.freq[j], (byte) j); + cumulativeFrequency += decoder.freq[j]; } } } @@ -241,24 +236,21 @@ private void readFrequencyTableOrder0( private void readFrequencyTableOrder1( final ByteBuffer cp, int shift) { - final int[][] F = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; - - // TODO: commented out to suppress spotBugs warning - //final int[][] C = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; + final int[][] frequencies = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; final ArithmeticDecoder[] D = getD(); final RANSDecodingSymbol[][] decodingSymbols = getDecodingSymbols(); - final int[] A = readAlphabet(cp); + final int[] alphabet = readAlphabet(cp); for (int i=0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - if (A[i] > 0) { + if (alphabet[i] > 0) { int run = 0; for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (A[j] > 0) { + if (alphabet[j] > 0) { if (run > 0) { run--; } else { - F[i][j] = Utils.readUint7(cp); - if (F[i][j] == 0){ + frequencies[i][j] = Utils.readUint7(cp); + if (frequencies[i][j] == 0){ run = Utils.readUint7(cp); } } @@ -266,12 +258,12 @@ private void readFrequencyTableOrder1( } // For each symbol, normalise it's order 0 frequency table - Utils.normaliseFrequenciesOrder0Shift(F[i],shift); + Utils.normaliseFrequenciesOrder0Shift(frequencies[i],shift); int cumulativeFreq=0; // set decoding symbols for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - D[i].freq[j]=F[i][j]; + D[i].freq[j]=frequencies[i][j]; D[i].cumulativeFreq[j]=cumulativeFreq; decodingSymbols[i][j].set( D[i].cumulativeFreq[j], @@ -279,7 +271,7 @@ private void readFrequencyTableOrder1( ); /* Build reverse lookup table */ Arrays.fill(D[i].reverseLookup, cumulativeFreq, cumulativeFreq + D[i].freq[j], (byte) j); - cumulativeFreq+=F[i][j]; + cumulativeFreq+=frequencies[i][j]; } } } @@ -287,26 +279,26 @@ private void readFrequencyTableOrder1( private static int[] readAlphabet(final ByteBuffer cp){ // gets the list of alphabets whose frequency!=0 - final int[] A = new int[Constants.NUMBER_OF_SYMBOLS]; + final int[] alphabet = new int[Constants.NUMBER_OF_SYMBOLS]; for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - A[i]=0; + alphabet[i]=0; } int rle = 0; - int sym = cp.get() & 0xFF; - int last_sym = sym; + int symbol = cp.get() & 0xFF; + int lastSymbol = symbol; do { - A[sym] = 1; + alphabet[symbol] = 1; if (rle!=0) { rle--; - sym++; + symbol++; } else { - sym = cp.get() & 0xFF; - if (sym == last_sym+1) + symbol = cp.get() & 0xFF; + if (symbol == lastSymbol+1) rle = cp.get() & 0xFF; } - last_sym = sym; - } while (sym != 0); - return A; + lastSymbol = symbol; + } while (symbol != 0); + return alphabet; } } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index 77fc86c60f..8e9ebebe23 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -41,7 +41,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN // First byte of the compressed output provides the order of RANS. // So, it has to be changed to 0x00 outBuffer.put(0,(byte) 0x00); - return compressOrder0WayN(inBuffer, new RANSNx16Params(0x00), outBuffer); // correct the format flags to 0 + return compressOrder0WayN(inBuffer, new RANSNx16Params(0x00), outBuffer); } switch (ransNx16Params.getOrder()) { @@ -54,7 +54,10 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN } } - private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final RANSNx16Params ransNx16Params, final ByteBuffer outBuffer) { + private ByteBuffer compressOrder0WayN ( + final ByteBuffer inBuffer, + final RANSNx16Params ransNx16Params, + final ByteBuffer outBuffer) { final int inSize = inBuffer.remaining(); final int[] F = buildFrequenciesOrder0(inBuffer); final ByteBuffer cp = outBuffer.slice(); @@ -84,14 +87,14 @@ private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final RANSNx16P inBuffer.rewind(); //TODO: tmp staging glue - final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; + final RANSEncodingSymbol[] ransEncodingSymbols = getEncodingSymbols()[0]; final int Nway = ransNx16Params.getInterleaveSize(); - final int cdata_size; - final int in_size = inBuffer.remaining(); + final int compressedDataSize; + final int inputSize = inBuffer.remaining(); final ByteBuffer ptr = cp.slice(); final long[] rans = new long[Nway]; - final int[] c = new int[Nway]; // c is the array of symbols + final int[] symbol = new int[Nway]; int r; for (r=0; r> 2) : (inputSize >> 5); + int remainingSize = inputSize - (interleaveSize * Nway); + int reverseIndex = 1; // encoded in LIFO order - while (remSize>0){ + while (remainingSize>0){ // encode remaining elements first - int symbol_ =0xFF & inBuffer.get(in_size - rev_idx); - rans[remSize - 1] = syms[symbol_].putSymbolNx16(rans[remSize - 1], ptr); - remSize --; - rev_idx ++; + int remainingSymbol =0xFF & inBuffer.get(inputSize - reverseIndex); + rans[remainingSize - 1] = ransEncodingSymbols[remainingSymbol].putSymbolNx16(rans[remainingSize - 1], ptr); + remainingSize --; + reverseIndex ++; } int i; - for (i = (in_size - (in_size%Nway)); i > 0; i -= Nway) { + for (i = (interleaveSize * Nway); i > 0; i -= Nway) { for (r = Nway - 1; r >= 0; r--){ // encode using Nway parallel rans states. Nway = 4 or 32 - c[r] = 0xFF & inBuffer.get(i - (Nway - r)); - rans[r] = syms[c[r]].putSymbolNx16(rans[r], ptr); + symbol[r] = 0xFF & inBuffer.get(i - (Nway - r)); + rans[r] = ransEncodingSymbols[symbol[r]].putSymbolNx16(rans[r], ptr); } } for (i=Nway-1; i>=0; i--){ @@ -127,26 +133,27 @@ private ByteBuffer compressOrder0WayN(final ByteBuffer inBuffer, final RANSNx16P } ptr.position(); ptr.flip(); - cdata_size = ptr.limit(); + compressedDataSize = ptr.limit(); // since the data is encoded in reverse order, // reverse the compressed bytes, so that it is in correct order when uncompressed. Utils.reverse(ptr); inBuffer.position(inBuffer.limit()); - outBuffer.rewind(); // set position to 0 - outBuffer.limit(prefix_size + frequencyTableSize + cdata_size); + outBuffer.limit(prefix_size + frequencyTableSize + compressedDataSize); return outBuffer; } - private ByteBuffer compressOrder1WayN(final ByteBuffer inBuffer, final RANSNx16Params ransNx16Params, final ByteBuffer outBuffer) { - //TODO: does not work as expected. Need to fix + private ByteBuffer compressOrder1WayN ( + final ByteBuffer inBuffer, + final RANSNx16Params ransNx16Params, + final ByteBuffer outBuffer) { final ByteBuffer cp = outBuffer.slice(); - final int[][] F = buildFrequenciesOrder1(inBuffer, ransNx16Params.getInterleaveSize()); + final int[][] frequencies = buildFrequenciesOrder1(inBuffer, ransNx16Params.getInterleaveSize()); // normalise frequencies with a variable shift calculated // using the minimum bit size that is needed to represent a frequency context array - Utils.normaliseFrequenciesOrder1(F, Constants.TOTAL_FREQ_SHIFT); + Utils.normaliseFrequenciesOrder1(frequencies, Constants.TOTAL_FREQ_SHIFT); final int prefix_size = outBuffer.position(); // TODO: How is the buffer size calculated? js: 257*257*3+9 @@ -154,7 +161,7 @@ private ByteBuffer compressOrder1WayN(final ByteBuffer inBuffer, final RANSNx16P ByteBuffer compressedFrequencyTable = allocateOutputBuffer(1); // uncompressed frequency table - final int uncompressedFrequencyTableSize = writeFrequenciesOrder1(frequencyTable,F); + final int uncompressedFrequencyTableSize = writeFrequenciesOrder1(frequencyTable,frequencies); frequencyTable.limit(uncompressedFrequencyTableSize); frequencyTable.rewind(); @@ -190,87 +197,86 @@ private ByteBuffer compressOrder1WayN(final ByteBuffer inBuffer, final RANSNx16P int frequencyTableSize = cp.position(); // normalise frequencies with a constant shift - Utils.normaliseFrequenciesOrder1Shift(F, Constants.TOTAL_FREQ_SHIFT); + Utils.normaliseFrequenciesOrder1Shift(frequencies, Constants.TOTAL_FREQ_SHIFT); - // set encoding symbols - buildSymsOrder1(F); // TODO: move into utils + // set encoding symbol + buildSymsOrder1(frequencies); // TODO: move into utils // uncompress for Nway = 4. then extend Nway to be variable - 4 or 32 - // TODO: debug. //TODO: tmp staging - final RANSEncodingSymbol[][] syms = getEncodingSymbols(); + final RANSEncodingSymbol[][] ransEncodingSymbols = getEncodingSymbols(); + final int Nway = ransNx16Params.getInterleaveSize(); + final int inputSize = inBuffer.remaining(); + final long[] rans = new long[Nway]; + int r; + for (r=0; r> 2; - int i0 = isz4 - 2; - int i1 = 2 * isz4 - 2; - int i2 = 3 * isz4 - 2; - int i3 = 4 * isz4 - 2; + // size of each interleaved array = total size / Nway; + // For Nway = 4, division by 4 is the same as right shift by 2 bits + // For Nway = 32, division by 32 is the same as right shift by 5 bits + final int interleaveSize = (Nway == 4) ? inputSize >> 2: inputSize >> 5; + final int[] interleaveStreamIndex = new int[Nway]; + final int[] symbol = new int[Nway]; + final int[] context = new int[Nway]; + for (r=0; r= 0) { - l0 = 0xFF & inBuffer.get(i0 + 1); - } - int l1 = 0; - if (i1 + 1 >= 0) { - l1 = 0xFF & inBuffer.get(i1 + 1); - } - int l2 = 0; - if (i2 + 1 >= 0) { - l2 = 0xFF & inBuffer.get(i2 + 1); + // initialize interleaveStreamIndex + // interleaveStreamIndex = (index of last element in the interleaved stream - 1) = (interleaveSize - 1) - 1 + interleaveStreamIndex[r] = (r+1)*interleaveSize - 2; + + //intialize symbol + symbol[r]=0; + if((interleaveStreamIndex[r]+1 >= 0) & (r!= Nway-1)){ + symbol[r] = 0xFF & inBuffer.get(interleaveStreamIndex[r] + 1); + } + if ( r == Nway-1 ){ + symbol[r] = 0xFF & inBuffer.get(inputSize - 1); + } } - int l3; - - // Deal with the remainder - l3 = 0xFF & inBuffer.get(in_size - 1); - for (i3 = in_size - 2; i3 > 4 * isz4 - 2 && i3 >= 0; i3--) { - final int c3 = 0xFF & inBuffer.get(i3); - rans3 = syms[c3][l3].putSymbolNx16(rans3, ptr); - l3 = c3; + + // deal with the reminder + for ( + interleaveStreamIndex[Nway - 1] = inputSize - 2; + interleaveStreamIndex[Nway - 1] > Nway * interleaveSize - 2 && interleaveStreamIndex[Nway - 1] >= 0; + interleaveStreamIndex[Nway - 1]-- ) { + context[Nway - 1] = 0xFF & inBuffer.get(interleaveStreamIndex[Nway - 1]); + rans[Nway - 1] = ransEncodingSymbols[context[Nway - 1]][symbol[Nway - 1]].putSymbolNx16(rans[Nway - 1], ptr); + symbol[Nway - 1] = context[Nway - 1]; } - for (; i0 >= 0; i0--, i1--, i2--, i3--) { - final int c0 = 0xFF & inBuffer.get(i0); - final int c1 = 0xFF & inBuffer.get(i1); - final int c2 = 0xFF & inBuffer.get(i2); - final int c3 = 0xFF & inBuffer.get(i3); - - rans3 = syms[c3][l3].putSymbolNx16(rans3, ptr); - rans2 = syms[c2][l2].putSymbolNx16(rans2, ptr); - rans1 = syms[c1][l1].putSymbolNx16(rans1, ptr); - rans0 = syms[c0][l0].putSymbolNx16(rans0, ptr); - - l0 = c0; - l1 = c1; - l2 = c2; - l3 = c3; + while (interleaveStreamIndex[0] >= 0) { + for (r=0; r=0; r-- ){ + ptr.putInt((int) rans[r]); + } + ptr.flip(); - compressedBlobSize = ptr.limit(); + final int compressedBlobSize = ptr.limit(); Utils.reverse(ptr); /* * Depletion of the in buffer cannot be confirmed because of the get(int @@ -296,7 +302,7 @@ private static int[] buildFrequenciesOrder0(final ByteBuffer inBuffer) { private static int[][] buildFrequenciesOrder1(final ByteBuffer inBuffer, final int Nway) { // Returns an array of raw symbol frequencies - final int inSize = inBuffer.remaining(); + final int inputSize = inBuffer.remaining(); // context is stored in frequency[Constants.NUMBER_OF_SYMBOLS] array final int[][] frequency = new int[Constants.NUMBER_OF_SYMBOLS+1][Constants.NUMBER_OF_SYMBOLS]; @@ -304,7 +310,7 @@ private static int[][] buildFrequenciesOrder1(final ByteBuffer inBuffer, final i // ‘\0’ is the initial context int contextSymbol = 0; int srcSymbol; - for (int i = 0; i < inSize; i++) { + for (int i = 0; i < inputSize; i++) { // update the context array frequency[Constants.NUMBER_OF_SYMBOLS][contextSymbol]++; @@ -317,7 +323,10 @@ private static int[][] buildFrequenciesOrder1(final ByteBuffer inBuffer, final i // set ‘\0’ as context for the first byte in the N interleaved streams. // the first byte of the first interleaved stream is already accounted for. for (int n = 1; n < Nway; n++){ - frequency[0][0xFF & inBuffer.get((n*((int)Math.floor(inSize/Nway))))]++; // TODO: use shift operator for division + // For Nway = 4, division by 4 is the same as right shift by 2 bits + // For Nway = 32, division by 32 is the same as right shift by 5 bits + int symbol = Nway == 4 ? (0xFF & inBuffer.get((n*(inputSize >> 2)))) : (0xFF & inBuffer.get((n*(inputSize >> 5)))); + frequency[0][symbol]++; } frequency[Constants.NUMBER_OF_SYMBOLS][0] += Nway-1; return frequency; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java index 6d5e99745d..c8a2126e36 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java @@ -43,9 +43,7 @@ public int getFormatFlags(){ public int getInterleaveSize(){ // Interleave N = 32 rANS states (else N = 4) - //TODO: temporarily always return 4 - //return ((formatFlags & N32_FLAG_MASK) == 0) ? 32 : 4; - return 4; + return ((formatFlags & N32_FLAG_MASK) == 0) ? 4 : 32; } public boolean getStripe(){ diff --git a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java index 98901df7f2..98ee5491c6 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java @@ -101,6 +101,24 @@ public Object[][] getRANS4x16TestData() throws IOException { new RANSNx16Params(0x01), "r4x16" // htscodecs directory where the RANSNx16 compressed files reside }); + + // RANS Nx16 order 1, bitflags = 0x04 + testCases.add(new Object[] { + p, + ransNx16Encode, + ransNx16Decode , + new RANSNx16Params(0x04), + "r4x16" // htscodecs directory where the RANSNx16 compressed files reside + }); + + // RANS Nx16 order 1, bitflags = 0x05 + testCases.add(new Object[] { + p, + ransNx16Encode, + ransNx16Decode , + new RANSNx16Params(0x05), + "r4x16" // htscodecs directory where the RANSNx16 compressed files reside + }); }); return testCases.toArray(new Object[][]{}); } @@ -173,7 +191,7 @@ public void testRANSPreCompressed( final ByteBuffer htsjdkCompressedBytes = ransEncode.compress(inputBytes, params); inputBytes.rewind(); -// // commenting for testing! +// // commenting as the comparison of compressed bytes is not needed to ensure interoperability. // // Compare the htsjdk compressed bytes with the precompressed file from htscodecs repo // Assert.assertEquals(htsjdkCompressedBytes, preCompressedInputBytes); diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index d72e11e419..92f88dcd09 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -84,6 +84,8 @@ public Object[][] getRansNx16Codecs() { return new Object[][]{ {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x00)} ,//RANSNx16 formatFlags(first byte) 0 {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x01)} ,//RANSNx16 formatFlags(first byte) 1 + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x04)} ,//RANSNx16 formatFlags(first byte) 4 + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x05)} ,//RANSNx16 formatFlags(first byte) 5 }; } From 50349151bf00055a3af7915b2d43e209bfcacdb1 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 6 Jun 2022 15:24:42 -0400 Subject: [PATCH 27/76] When CAT is true, add limit and rewind the outBuffer before returning it. --- .../samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index 8e9ebebe23..f5c144fae6 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -30,6 +30,8 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN if (ransNx16Params.getCAT()) { // Data is uncompressed outBuffer.put(inBuffer); + outBuffer.limit(outBuffer.position()); + outBuffer.rewind(); // set position to 0 return outBuffer; } From c966eecddee8611218fd734a73a29c046209300e Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 6 Jun 2022 15:25:54 -0400 Subject: [PATCH 28/76] Add RANSTest with formatflags = 32, 33, 36, 37 --- .../cram/compression/rans/RansTest.java | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 92f88dcd09..dd31644540 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -82,10 +82,32 @@ public Object[][] getRansNx16Codecs() { final RANSNx16Decode ransNx16Decode = new RANSNx16Decode(); // TODO: More formatFlags values i.e, combinations of bit flags will be added later return new Object[][]{ - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x00)} ,//RANSNx16 formatFlags(first byte) 0 - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x01)} ,//RANSNx16 formatFlags(first byte) 1 - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x04)} ,//RANSNx16 formatFlags(first byte) 4 - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x05)} ,//RANSNx16 formatFlags(first byte) 5 + + //RANSNx16 formatFlags(first byte) 0: Order 0, N = 4, CAT false + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x00)} , + + //RANSNx16 formatFlags(first byte) 1: Order 1, N = 4, CAT false + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x01)} , + + //RANSNx16 formatFlags(first byte) 4: Order 0, N = 32, CAT false + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x04)} , + + //RANSNx16 formatFlags(first byte) 5: Order 1, N = 32, CAT false + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x05)} , + + //RANSNx16 formatFlags(first byte) 32: Order 0, N = 4, CAT true + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x20)} , + + //RANSNx16 formatFlags(first byte) 33: Order 1, N = 4, CAT true + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x21)} , + + //RANSNx16 formatFlags(first byte) 36: Order 0, N = 32, CAT true + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x24)} , + + //RANSNx16 formatFlags(first byte) 37: Order 1, N = 32, CAT true + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x25)} , + + }; } From 890940e30ce2d2966bf08538447a85335ce41c53 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 6 Jun 2022 16:43:09 -0400 Subject: [PATCH 29/76] Remove initialization of alphabet array. --- .../cram/compression/rans/ransnx16/RANSNx16Decode.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index e5aa1ab4c7..23d8344d31 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -280,9 +280,6 @@ private void readFrequencyTableOrder1( private static int[] readAlphabet(final ByteBuffer cp){ // gets the list of alphabets whose frequency!=0 final int[] alphabet = new int[Constants.NUMBER_OF_SYMBOLS]; - for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - alphabet[i]=0; - } int rle = 0; int symbol = cp.get() & 0xFF; int lastSymbol = symbol; From c3dd46da0434a2c1be6259ba896b35e03feb911f Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 25 Jul 2022 14:33:31 -0400 Subject: [PATCH 30/76] Add RLE Encode and Decode. Works as expected for RANSNx16 Order 0 --- .../rans/ransnx16/RANSNx16Decode.java | 70 +++++++++- .../rans/ransnx16/RANSNx16Encode.java | 127 +++++++++++++++++- .../samtools/cram/CRAMCodecCorpusTest.java | 10 ++ .../cram/compression/rans/RansTest.java | 7 + 4 files changed, 206 insertions(+), 8 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 23d8344d31..da5aadd6f3 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -4,6 +4,7 @@ import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSDecode; import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; +import htsjdk.samtools.cram.compression.rans.RANSParams; import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; @@ -23,7 +24,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { inBuffer.order(ByteOrder.LITTLE_ENDIAN); // the first byte of compressed stream gives the formatFlags - final int formatFlags = inBuffer.get(); + final int formatFlags = inBuffer.get() & 0xFF; final RANSNx16Params ransNx16Params = new RANSNx16Params(formatFlags); // TODO: add methods to handle various flags @@ -31,6 +32,18 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { // if nosz is set, then uncompressed size is not recorded. int n_out = ransNx16Params.getNosz() ? 0 : Utils.readUint7(inBuffer); + // if rle, get rle metadata, which will be used later to decode rle + final int uncompressedRLEMetaDataLength; + int uncompressedRLEOutputLength = 0; + final int[] rleSymbols = new int[Constants.NUMBER_OF_SYMBOLS]; + ByteBuffer uncompressedRLEMetaData = null; + if (ransNx16Params.getRLE()){ + uncompressedRLEMetaDataLength = Utils.readUint7(inBuffer); + uncompressedRLEOutputLength = n_out; + n_out = Utils.readUint7(inBuffer); + uncompressedRLEMetaData = decodeRLEMeta(inBuffer,ransNx16Params,uncompressedRLEMetaDataLength,rleSymbols); + } + // If CAT is set then, the input is uncompressed if (ransNx16Params.getCAT()){ byte[] data = new byte[n_out]; @@ -38,7 +51,6 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { return ByteBuffer.wrap(data); } else { - initializeRANSDecoder(); final ByteBuffer outBuffer = ByteBuffer.allocate(n_out); switch (ransNx16Params.getOrder()){ // TODO: remove n_out? @@ -51,6 +63,11 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { default: throw new RuntimeException("Unknown rANS order: " + ransNx16Params.getOrder()); } + + // if rle, then decodeRLE + if (ransNx16Params.getRLE() & uncompressedRLEMetaData!=null ){ + return decodeRLE(outBuffer,rleSymbols,uncompressedRLEMetaData, uncompressedRLEOutputLength); + } return outBuffer; } } @@ -60,6 +77,7 @@ private ByteBuffer uncompressOrder0WayN( final ByteBuffer outBuffer, final int n_out, final RANSNx16Params ransNx16Params) { + initializeRANSDecoder(); // read the frequency table, get the normalised frequencies and use it to set the RANSDecodingSymbols readFrequencyTableOrder0(inBuffer); @@ -118,6 +136,7 @@ private ByteBuffer uncompressOrder1WayN( final ByteBuffer outBuffer, final int n_out, final RANSNx16Params ransNx16Params) { + initializeRANSDecoder(); // read the first byte and calculate the bit shift final int frequencyTableFirstByte = (inBuffer.get() & 0xFF); @@ -298,4 +317,51 @@ private static int[] readAlphabet(final ByteBuffer cp){ return alphabet; } + private ByteBuffer decodeRLEMeta(final ByteBuffer inBuffer , final RANSParams ransParams, final int uncompressedRLEMetaDataLength, final int[] rleSymbols) { + ByteBuffer uncompressedRLEMetaData; + final int compressedRLEMetaDataLength; + if ((uncompressedRLEMetaDataLength & 0x01)!=0) { + byte[] uncompressedRLEMetaDataArray = new byte[(uncompressedRLEMetaDataLength-1)/2]; + inBuffer.get(uncompressedRLEMetaDataArray, 0, (uncompressedRLEMetaDataLength-1)/2); + uncompressedRLEMetaData = ByteBuffer.wrap(uncompressedRLEMetaDataArray); + } else { + compressedRLEMetaDataLength = Utils.readUint7(inBuffer); + ByteBuffer compressedRLEMetaData = ByteBuffer.allocate(compressedRLEMetaDataLength); + byte[] compressedRLEMetaDataArray = new byte[compressedRLEMetaDataLength]; + inBuffer.get(compressedRLEMetaDataArray,0,compressedRLEMetaDataLength); + compressedRLEMetaData = ByteBuffer.wrap(compressedRLEMetaDataArray); + compressedRLEMetaData.order(ByteOrder.LITTLE_ENDIAN); + uncompressedRLEMetaData = ByteBuffer.allocate(uncompressedRLEMetaDataLength / 2); + + // TODO: get Nway from ransParams and use N to uncompress + uncompressOrder0WayN(compressedRLEMetaData,uncompressedRLEMetaData, uncompressedRLEMetaDataLength / 2, new RANSNx16Params(0x00)); // N should come from the prev step + } + + int numRLESymbols = uncompressedRLEMetaData.get() & 0xFF; + if (numRLESymbols == 0) { + numRLESymbols = 256; + } + for (int i = 0; i< numRLESymbols; i++) { + rleSymbols[uncompressedRLEMetaData.get() & 0xFF] = 1; + } + return uncompressedRLEMetaData; + } + + private ByteBuffer decodeRLE(final ByteBuffer inBuffer , final int[] rleSymbols, final ByteBuffer uncompressedRLEMetaData, int uncompressedRLEOutputLength) { + ByteBuffer outBuffer = ByteBuffer.allocate(uncompressedRLEOutputLength); + int j = 0; + for(int i = 0; j< uncompressedRLEOutputLength; i++){ + int sym = inBuffer.get(i) & 0xFF; + if (rleSymbols[sym]!=0){ + int run = Utils.readUint7(uncompressedRLEMetaData); + for (int r=0; r<= run; r++){ + outBuffer.put(j++, (byte) sym); + } + }else { + outBuffer.put(j++, (byte) sym); + } + } + return outBuffer; + } + } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index f5c144fae6..4be39b7763 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -3,6 +3,7 @@ import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSEncode; import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; +import htsjdk.samtools.cram.compression.rans.RANSParams; import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; @@ -22,35 +23,49 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN // TODO: add methods to handle various flags + // NoSize if (!ransNx16Params.getNosz()) { // original size is not recorded int insize = inBuffer.remaining(); Utils.writeUint7(insize,outBuffer); } + + // TODO: Add Stripe + + // TODO: Add Pack + + // using inputBuffer as inBuffer is declared final + // TODO: should inBuffer not be declared final? + ByteBuffer inputBuffer = inBuffer; + // RLE + if (ransNx16Params.getRLE()){ + inputBuffer = encodeRLE(inBuffer, ransNx16Params, outBuffer); + } + + if (ransNx16Params.getCAT()) { // Data is uncompressed - outBuffer.put(inBuffer); + outBuffer.put(inputBuffer); outBuffer.limit(outBuffer.position()); outBuffer.rewind(); // set position to 0 return outBuffer; } - initializeRANSEncoder(); - if (inBuffer.remaining() < MINIMUM__ORDER_1_SIZE) { + if (inputBuffer.remaining() < MINIMUM__ORDER_1_SIZE && ransNx16Params.getOrder() == RANSParams.ORDER.ONE) { // TODO: check if this still applies for Nx16 or if there is a different limit // ORDER-1 encoding of less than 4 bytes is not permitted, so just use ORDER-0 // First byte of the compressed output provides the order of RANS. // So, it has to be changed to 0x00 outBuffer.put(0,(byte) 0x00); - return compressOrder0WayN(inBuffer, new RANSNx16Params(0x00), outBuffer); + return compressOrder0WayN(inputBuffer, new RANSNx16Params(0x00), outBuffer); } switch (ransNx16Params.getOrder()) { case ZERO: - return compressOrder0WayN(inBuffer, ransNx16Params, outBuffer); + return compressOrder0WayN(inputBuffer, ransNx16Params, outBuffer); case ONE: - return compressOrder1WayN(inBuffer, ransNx16Params, outBuffer); + return compressOrder1WayN(inputBuffer, ransNx16Params, outBuffer); default: throw new RuntimeException("Unknown rANS order: " + ransNx16Params.getOrder()); } @@ -60,6 +75,7 @@ private ByteBuffer compressOrder0WayN ( final ByteBuffer inBuffer, final RANSNx16Params ransNx16Params, final ByteBuffer outBuffer) { + initializeRANSEncoder(); final int inSize = inBuffer.remaining(); final int[] F = buildFrequenciesOrder0(inBuffer); final ByteBuffer cp = outBuffer.slice(); @@ -150,6 +166,7 @@ private ByteBuffer compressOrder1WayN ( final ByteBuffer inBuffer, final RANSNx16Params ransNx16Params, final ByteBuffer outBuffer) { + initializeRANSEncoder(); final ByteBuffer cp = outBuffer.slice(); final int[][] frequencies = buildFrequenciesOrder1(inBuffer, ransNx16Params.getInterleaveSize()); @@ -469,4 +486,102 @@ private void buildSymsOrder1(final int[][] F) { } } + private ByteBuffer encodeRLE(final ByteBuffer inBuffer ,final RANSParams ransParams, final ByteBuffer outBuffer){ + + // Find the symbols that benefit from RLE, i.e, the symbols that occur more than 2 times in succession. + // spec: For symbols that occur many times in succession, we can replace them with a single symbol and a count. + final int[] rleSymbols = new int[Constants.NUMBER_OF_SYMBOLS]; + int inputSize = inBuffer.remaining(); + + int lastSymbol = -1; + for (int i = 0; i < inputSize; i++) { + int currentSymbol = inBuffer.get(i)&0xFF; + rleSymbols[currentSymbol] += (currentSymbol==lastSymbol ? 1:-1); + lastSymbol = currentSymbol; + } + + // numRLESymbols is the number of symbols that are run length encoded + int numRLESymbols = 0; + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + if (rleSymbols[i]>0) { + numRLESymbols++; + } + } + + if (numRLESymbols==0) { + // Format cannot cope with zero RLE symbols, so pick one! + numRLESymbols = 1; + rleSymbols[0] = 1; + } + + // create rleMetaData buffer to store rle metadata. + // This buffer will be compressed using compressOrder0WayN towards the end of this method + // TODO: How did we come up with this calculation for Buffer size? numRLESymbols+1+inputSize + ByteBuffer rleMetaData = ByteBuffer.allocate(numRLESymbols+1+inputSize); // rleMetaData + + // write number of symbols that are run length encoded to the outBuffer + rleMetaData.put((byte) numRLESymbols); + + for (int i=0; i<256; i++){ + if (rleSymbols[i] >0){ + // write the symbols that are run length encoded + rleMetaData.put((byte) i); + } + + } + + // Apply RLE + // encodedData -> input src data without repetition + ByteBuffer encodedData = ByteBuffer.allocate(inputSize); // rleInBuffer + int encodedDataIdx = 0; // rleInBufferIndex + + for (int i = 0; i < inputSize; i++) { + encodedData.put(encodedDataIdx++,inBuffer.get(i)); + if (rleSymbols[inBuffer.get(i)&0xFF]>0) { + lastSymbol = inBuffer.get(i) & 0xFF; + int run = 0; + + // calculate the run value for current symbol + while (i+run+1 < inputSize && (inBuffer.get(i+run+1)& 0xFF)==lastSymbol) { + run++; + } + + // write the run value to metadata + Utils.writeUint7(run, rleMetaData); + + // go to the next element that is not equal to it's previous element + i += run; + } + } + + encodedData.limit(encodedDataIdx); + // limit and rewind + // TODO: check if position of rleMetadata is at the end of the buffer as expected + rleMetaData.limit(rleMetaData.position()); + rleMetaData.rewind(); + + // compress the rleMetaData Buffer + ByteBuffer compressedRleMetaData = allocateOutputBuffer(rleMetaData.remaining()); + + // TODO: Nway? Check other places as well -> How to setInterleaveSize? - can i do it by changing formatflags? + // // Compress lengths with O0 and literals with O0/O1 ("order" param) + // TODO: get Nway from ransParams and use N to uncompress + + compressOrder0WayN(rleMetaData, new RANSNx16Params(0x00),compressedRleMetaData); + + // write to compressedRleMetaData to outBuffer + Utils.writeUint7(rleMetaData.limit()*2, outBuffer); + Utils.writeUint7(encodedDataIdx, outBuffer); + Utils.writeUint7(compressedRleMetaData.limit(),outBuffer); + + outBuffer.put(compressedRleMetaData); + + /* + * Depletion of the inBuffer cannot be confirmed because of the get(int + * position) method use during encoding, hence enforcing: + */ + inBuffer.position(inBuffer.limit()); + return encodedData; + } + } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java index 98ee5491c6..cde7d42e10 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java @@ -93,6 +93,16 @@ public Object[][] getRANS4x16TestData() throws IOException { new RANSNx16Params(0x00), "r4x16" // htscodecs directory where the RANSNx16 compressed files reside }); + + // RANS Nx16 order 0, rle flag is set + testCases.add(new Object[] { + p, + ransNx16Encode, + ransNx16Decode , + new RANSNx16Params(0x40), + "r4x16" // htscodecs directory where the RANSNx16 compressed files reside + }); + // RANS Nx16 order 1, bitflags = 0x01 testCases.add(new Object[] { p, diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index dd31644540..ca750cba00 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -106,7 +106,12 @@ public Object[][] getRansNx16Codecs() { //RANSNx16 formatFlags(first byte) 37: Order 1, N = 32, CAT true {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x25)} , +// + //RANSNx16 formatFlags(first byte) 64: Order 0, N = 4, CAT false, RLE = true + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x40)} , +// //RANSNx16 formatFlags(first byte) 65: Order 1, N = 4, CAT false, RLE = true +// {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x41)} , }; } @@ -225,6 +230,8 @@ private static void ransRoundTrip( final ByteBuffer compressed = ransEncode.compress(data, params); final ByteBuffer uncompressed = ransDecode.uncompress(compressed); data.rewind(); +// Assert.assertEquals(data, uncompressed); + while (data.hasRemaining()) { if (!uncompressed.hasRemaining()) { Assert.fail("Premature end of uncompressed data."); From 50204770f80bce005aa3e1ef47e43a371bce8ce0 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 25 Jul 2022 14:48:37 -0400 Subject: [PATCH 31/76] Move declaration of variables used within the for loop to inside the for loop --- .../rans/ransnx16/RANSNx16Decode.java | 12 ++++------ .../rans/ransnx16/RANSNx16Encode.java | 24 ++++++++----------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index da5aadd6f3..f5b904ade5 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -93,8 +93,7 @@ private ByteBuffer uncompressOrder0WayN( // symbols is the array of decoded symbols final int[] symbols = new int[Nway]; - int r; - for (r=0; r> 2): (outputSize >> 5); - int r; - for (r=0; r 0; i -= Nway) { - for (r = Nway - 1; r >= 0; r--){ + for (int i = (interleaveSize * Nway); i > 0; i -= Nway) { + for (int r = Nway - 1; r >= 0; r--){ // encode using Nway parallel rans states. Nway = 4 or 32 symbol[r] = 0xFF & inBuffer.get(i - (Nway - r)); rans[r] = ransEncodingSymbols[symbol[r]].putSymbolNx16(rans[r], ptr); } } - for (i=Nway-1; i>=0; i--){ + for (int i=Nway-1; i>=0; i--){ ptr.putInt((int) rans[i]); } ptr.position(); @@ -228,8 +225,7 @@ private ByteBuffer compressOrder1WayN ( final int Nway = ransNx16Params.getInterleaveSize(); final int inputSize = inBuffer.remaining(); final long[] rans = new long[Nway]; - int r; - for (r=0; r= 0) { - for (r=0; r=0; r-- ){ + for (int r=Nway-1; r>=0; r-- ){ ptr.putInt((int) rans[r]); } From c22cd8b84ff1bc5e46abdce43b9d9bfde4777dc2 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 25 Jul 2022 16:02:31 -0400 Subject: [PATCH 32/76] Convert symbols from int to byte --- .../rans/rans4x8/RANS4x8Decode.java | 38 ++++++------ .../rans/rans4x8/RANS4x8Encode.java | 61 +++++++++---------- .../rans/ransnx16/RANSNx16Decode.java | 22 +++---- .../rans/ransnx16/RANSNx16Encode.java | 36 +++++------ 4 files changed, 78 insertions(+), 79 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index ea243acd8d..0b6b4e1cad 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -146,25 +146,25 @@ private ByteBuffer uncompressOrder1Way4(final ByteBuffer inBuffer, final ByteBuf int i1 = isz4; int i2 = 2 * isz4; int i7 = 3 * isz4; - int l0 = 0; - int l1 = 0; - int l2 = 0; - int l7 = 0; + byte l0 = 0; + byte l1 = 0; + byte l2 = 0; + byte l7 = 0; for (; i0 < isz4; i0++, i1++, i2++, i7++) { - final int c0 = 0xFF & D[l0].reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; - final int c1 = 0xFF & D[l1].reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; - final int c2 = 0xFF & D[l2].reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; - final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; + final byte c0 = D[0xFF & l0].reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; + final byte c1 = D[0xFF & l1].reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; + final byte c2 = D[0xFF & l2].reverseLookup[Utils.RANSGetCumulativeFrequency(rans2, Constants.TOTAL_FREQ_SHIFT)]; + final byte c7 = D[0xFF & l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; - outBuffer.put(i0, (byte) c0); - outBuffer.put(i1, (byte) c1); - outBuffer.put(i2, (byte) c2); - outBuffer.put(i7, (byte) c7); + outBuffer.put(i0, c0); + outBuffer.put(i1, c1); + outBuffer.put(i2, c2); + outBuffer.put(i7, c7); - rans0 = syms[l0][c0].advanceSymbolStep(rans0, Constants.TOTAL_FREQ_SHIFT); - rans1 = syms[l1][c1].advanceSymbolStep(rans1, Constants.TOTAL_FREQ_SHIFT); - rans2 = syms[l2][c2].advanceSymbolStep(rans2, Constants.TOTAL_FREQ_SHIFT); - rans7 = syms[l7][c7].advanceSymbolStep(rans7, Constants.TOTAL_FREQ_SHIFT); + rans0 = syms[0xFF & l0][0xFF & c0].advanceSymbolStep(rans0, Constants.TOTAL_FREQ_SHIFT); + rans1 = syms[0xFF & l1][0xFF & c1].advanceSymbolStep(rans1, Constants.TOTAL_FREQ_SHIFT); + rans2 = syms[0xFF & l2][0xFF & c2].advanceSymbolStep(rans2, Constants.TOTAL_FREQ_SHIFT); + rans7 = syms[0xFF & l7][0xFF & c7].advanceSymbolStep(rans7, Constants.TOTAL_FREQ_SHIFT); rans0 = Utils.RANSDecodeRenormalize4x8(rans0, inBuffer); rans1 = Utils.RANSDecodeRenormalize4x8(rans1, inBuffer); @@ -179,9 +179,9 @@ private ByteBuffer uncompressOrder1Way4(final ByteBuffer inBuffer, final ByteBuf // Remainder for (; i7 < out_sz; i7++) { - final int c7 = 0xFF & D[l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; - outBuffer.put(i7, (byte) c7); - rans7 = syms[l7][c7].advanceSymbol4x8(rans7, inBuffer, Constants.TOTAL_FREQ_SHIFT); + final byte c7 = D[0xFF & l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; + outBuffer.put(i7, c7); + rans7 = syms[0xFF & l7][0xFF & c7].advanceSymbol4x8(rans7, inBuffer, Constants.TOTAL_FREQ_SHIFT); l7 = c7; } return outBuffer; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index 1ddd5d1428..2d58a2988e 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -88,15 +88,15 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { break; } for (i = (in_size & ~3); i > 0; i -= 4) { - final int c3 = 0xFF & inBuffer.get(i - 1); - final int c2 = 0xFF & inBuffer.get(i - 2); - final int c1 = 0xFF & inBuffer.get(i - 3); - final int c0 = 0xFF & inBuffer.get(i - 4); - - rans3 = syms[c3].putSymbol4x8(rans3, ptr); - rans2 = syms[c2].putSymbol4x8(rans2, ptr); - rans1 = syms[c1].putSymbol4x8(rans1, ptr); - rans0 = syms[c0].putSymbol4x8(rans0, ptr); + final byte c3 = inBuffer.get(i - 1); + final byte c2 = inBuffer.get(i - 2); + final byte c1 = inBuffer.get(i - 3); + final byte c0 = inBuffer.get(i - 4); + + rans3 = syms[0xFF & c3].putSymbol4x8(rans3, ptr); + rans2 = syms[0xFF & c2].putSymbol4x8(rans2, ptr); + rans1 = syms[0xFF & c1].putSymbol4x8(rans1, ptr); + rans0 = syms[0xFF & c0].putSymbol4x8(rans0, ptr); } ptr.putInt((int) rans3); @@ -152,38 +152,37 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { int i2 = 3 * isz4 - 2; int i3 = 4 * isz4 - 2; - int l0 = 0; + byte l0 = 0; if (i0 + 1 >= 0) { - l0 = 0xFF & inBuffer.get(i0 + 1); + l0 = inBuffer.get(i0 + 1); } - int l1 = 0; + byte l1 = 0; if (i1 + 1 >= 0) { - l1 = 0xFF & inBuffer.get(i1 + 1); + l1 = inBuffer.get(i1 + 1); } - int l2 = 0; + byte l2 = 0; if (i2 + 1 >= 0) { - l2 = 0xFF & inBuffer.get(i2 + 1); + l2 = inBuffer.get(i2 + 1); } - int l3; // Deal with the remainder - l3 = 0xFF & inBuffer.get(in_size - 1); + byte l3 = inBuffer.get(in_size - 1); for (i3 = in_size - 2; i3 > 4 * isz4 - 2 && i3 >= 0; i3--) { - final int c3 = 0xFF & inBuffer.get(i3); - rans3 = syms[c3][l3].putSymbol4x8(rans3, ptr); + final byte c3 = inBuffer.get(i3); + rans3 = syms[0xFF & c3][0xFF & l3].putSymbol4x8(rans3, ptr); l3 = c3; } for (; i0 >= 0; i0--, i1--, i2--, i3--) { - final int c0 = 0xFF & inBuffer.get(i0); - final int c1 = 0xFF & inBuffer.get(i1); - final int c2 = 0xFF & inBuffer.get(i2); - final int c3 = 0xFF & inBuffer.get(i3); + final byte c0 = inBuffer.get(i0); + final byte c1 = inBuffer.get(i1); + final byte c2 = inBuffer.get(i2); + final byte c3 = inBuffer.get(i3); - rans3 = syms[c3][l3].putSymbol4x8(rans3, ptr); - rans2 = syms[c2][l2].putSymbol4x8(rans2, ptr); - rans1 = syms[c1][l1].putSymbol4x8(rans1, ptr); - rans0 = syms[c0][l0].putSymbol4x8(rans0, ptr); + rans3 = syms[0xFF & c3][0xFF & l3].putSymbol4x8(rans3, ptr); + rans2 = syms[0xFF & c2][0xFF & l2].putSymbol4x8(rans2, ptr); + rans1 = syms[0xFF & c1][0xFF & l1].putSymbol4x8(rans1, ptr); + rans0 = syms[0xFF & c0][0xFF & l0].putSymbol4x8(rans0, ptr); l0 = c0; l1 = c1; @@ -191,10 +190,10 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { l3 = c3; } - rans3 = syms[0][l3].putSymbol4x8(rans3, ptr); - rans2 = syms[0][l2].putSymbol4x8(rans2, ptr); - rans1 = syms[0][l1].putSymbol4x8(rans1, ptr); - rans0 = syms[0][l0].putSymbol4x8(rans0, ptr); + rans3 = syms[0][0xFF & l3].putSymbol4x8(rans3, ptr); + rans2 = syms[0][0xFF & l2].putSymbol4x8(rans2, ptr); + rans1 = syms[0][0xFF & l1].putSymbol4x8(rans1, ptr); + rans0 = syms[0][0xFF & l0].putSymbol4x8(rans0, ptr); ptr.order(ByteOrder.BIG_ENDIAN); ptr.putInt((int) rans3); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index f5b904ade5..1c0fd71b36 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -92,7 +92,7 @@ private ByteBuffer uncompressOrder0WayN( final long[] rans = new long[Nway]; // symbols is the array of decoded symbols - final int[] symbols = new int[Nway]; + final byte[] symbols = new byte[Nway]; for (int r=0; r0){ - int remainingSymbol = 0xFF & D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans[reverseIndex], Constants.TOTAL_FREQ_SHIFT)]; - syms[remainingSymbol].advanceSymbolNx16(rans[reverseIndex], inBuffer, Constants.TOTAL_FREQ_SHIFT); - outBuffer.put((byte) remainingSymbol); + byte remainingSymbol = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans[reverseIndex], Constants.TOTAL_FREQ_SHIFT)]; + syms[0xFF & remainingSymbol].advanceSymbolNx16(rans[reverseIndex], inBuffer, Constants.TOTAL_FREQ_SHIFT); + outBuffer.put(remainingSymbol); remSize --; reverseIndex ++; } @@ -349,14 +349,14 @@ private ByteBuffer decodeRLE(final ByteBuffer inBuffer , final int[] rleSymbols, ByteBuffer outBuffer = ByteBuffer.allocate(uncompressedRLEOutputLength); int j = 0; for(int i = 0; j< uncompressedRLEOutputLength; i++){ - int sym = inBuffer.get(i) & 0xFF; - if (rleSymbols[sym]!=0){ + byte sym = inBuffer.get(i); + if (rleSymbols[sym & 0xFF]!=0){ int run = Utils.readUint7(uncompressedRLEMetaData); for (int r=0; r<= run; r++){ - outBuffer.put(j++, (byte) sym); + outBuffer.put(j++, sym); } }else { - outBuffer.put(j++, (byte) sym); + outBuffer.put(j++, sym); } } return outBuffer; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index 6776bebe90..f621d1b670 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -112,7 +112,7 @@ private ByteBuffer compressOrder0WayN ( final int inputSize = inBuffer.remaining(); final ByteBuffer ptr = cp.slice(); final long[] rans = new long[Nway]; - final int[] symbol = new int[Nway]; + final byte[] symbol = new byte[Nway]; for (int r=0; r= 0; r--){ // encode using Nway parallel rans states. Nway = 4 or 32 - symbol[r] = 0xFF & inBuffer.get(i - (Nway - r)); - rans[r] = ransEncodingSymbols[symbol[r]].putSymbolNx16(rans[r], ptr); + symbol[r] = inBuffer.get(i - (Nway - r)); + rans[r] = ransEncodingSymbols[0xFF & symbol[r]].putSymbolNx16(rans[r], ptr); } } for (int i=Nway-1; i>=0; i--){ @@ -241,8 +241,8 @@ private ByteBuffer compressOrder1WayN ( // For Nway = 32, division by 32 is the same as right shift by 5 bits final int interleaveSize = (Nway == 4) ? inputSize >> 2: inputSize >> 5; final int[] interleaveStreamIndex = new int[Nway]; - final int[] symbol = new int[Nway]; - final int[] context = new int[Nway]; + final byte[] symbol = new byte[Nway]; + final byte[] context = new byte[Nway]; for (int r=0; r= 0) & (r!= Nway-1)){ - symbol[r] = 0xFF & inBuffer.get(interleaveStreamIndex[r] + 1); + symbol[r] = inBuffer.get(interleaveStreamIndex[r] + 1); } if ( r == Nway-1 ){ - symbol[r] = 0xFF & inBuffer.get(inputSize - 1); + symbol[r] = inBuffer.get(inputSize - 1); } } @@ -264,15 +264,15 @@ private ByteBuffer compressOrder1WayN ( interleaveStreamIndex[Nway - 1] = inputSize - 2; interleaveStreamIndex[Nway - 1] > Nway * interleaveSize - 2 && interleaveStreamIndex[Nway - 1] >= 0; interleaveStreamIndex[Nway - 1]-- ) { - context[Nway - 1] = 0xFF & inBuffer.get(interleaveStreamIndex[Nway - 1]); - rans[Nway - 1] = ransEncodingSymbols[context[Nway - 1]][symbol[Nway - 1]].putSymbolNx16(rans[Nway - 1], ptr); + context[Nway - 1] = inBuffer.get(interleaveStreamIndex[Nway - 1]); + rans[Nway - 1] = ransEncodingSymbols[0xFF & context[Nway - 1]][0xFF & symbol[Nway - 1]].putSymbolNx16(rans[Nway - 1], ptr); symbol[Nway - 1] = context[Nway - 1]; } while (interleaveStreamIndex[0] >= 0) { for (int r=0; r Date: Mon, 25 Jul 2022 16:32:02 -0400 Subject: [PATCH 33/76] rename getInterleaveSize to getNumInterleavedRANSStates in RANSNx16Params --- .../cram/compression/rans/ransnx16/RANSNx16Decode.java | 4 ++-- .../cram/compression/rans/ransnx16/RANSNx16Encode.java | 6 +++--- .../cram/compression/rans/ransnx16/RANSNx16Params.java | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 1c0fd71b36..4bcdf86f2e 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -86,7 +86,7 @@ private ByteBuffer uncompressOrder0WayN( //TODO: remove this temporary variable aliasing/staging final ArithmeticDecoder D = getD()[0]; final RANSDecodingSymbol[] syms = getDecodingSymbols()[0]; - final int Nway = ransNx16Params.getInterleaveSize(); + final int Nway = ransNx16Params.getNumInterleavedRANSStates(); // Nway parallel rans states. Nway = 4 or 32 final long[] rans = new long[Nway]; @@ -171,7 +171,7 @@ private ByteBuffer uncompressOrder1WayN( inBuffer.order(ByteOrder.LITTLE_ENDIAN); // Nway parallel rans states. Nway = 4 or 32 - final int Nway = ransNx16Params.getInterleaveSize(); + final int Nway = ransNx16Params.getNumInterleavedRANSStates(); final long[] rans = new long[Nway]; final int[] interleaveStreamIndex = new int[Nway]; final int[] context = new int[Nway]; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index f621d1b670..462e641e74 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -106,7 +106,7 @@ private ByteBuffer compressOrder0WayN ( //TODO: tmp staging glue final RANSEncodingSymbol[] ransEncodingSymbols = getEncodingSymbols()[0]; - final int Nway = ransNx16Params.getInterleaveSize(); + final int Nway = ransNx16Params.getNumInterleavedRANSStates(); final int compressedDataSize; final int inputSize = inBuffer.remaining(); @@ -165,7 +165,7 @@ private ByteBuffer compressOrder1WayN ( final ByteBuffer outBuffer) { initializeRANSEncoder(); final ByteBuffer cp = outBuffer.slice(); - final int[][] frequencies = buildFrequenciesOrder1(inBuffer, ransNx16Params.getInterleaveSize()); + final int[][] frequencies = buildFrequenciesOrder1(inBuffer, ransNx16Params.getNumInterleavedRANSStates()); // normalise frequencies with a variable shift calculated // using the minimum bit size that is needed to represent a frequency context array @@ -222,7 +222,7 @@ private ByteBuffer compressOrder1WayN ( //TODO: tmp staging final RANSEncodingSymbol[][] ransEncodingSymbols = getEncodingSymbols(); - final int Nway = ransNx16Params.getInterleaveSize(); + final int Nway = ransNx16Params.getNumInterleavedRANSStates(); final int inputSize = inBuffer.remaining(); final long[] rans = new long[Nway]; for (int r=0; r Date: Tue, 26 Jul 2022 15:37:22 -0400 Subject: [PATCH 34/76] RLE encode and decode works as expected for RANSNx16 Order 1 --- .../rans/ransnx16/RANSNx16Encode.java | 23 +++++++++++-------- .../samtools/cram/CRAMCodecCorpusTest.java | 11 ++++++++- .../cram/compression/rans/RansTest.java | 4 ++-- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index 462e641e74..7a98f751ba 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -30,6 +30,19 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN Utils.writeUint7(insize,outBuffer); } + if (inBuffer.remaining() < MINIMUM__ORDER_1_SIZE && ransNx16Params.getOrder() == RANSParams.ORDER.ONE) { + // Do this before RLE, pack etc.. + // Make sure outBuffer does not have anything except format flag and size + + // TODO: check if this still applies for Nx16 or if there is a different limit + // ORDER-1 encoding of less than 4 bytes is not permitted, so just use ORDER-0 + + // First byte of the compressed output provides the order of RANS. + // So, it has to be changed to 0x00 + outBuffer.put(0,(byte) 0x00); + return compressOrder0WayN(inBuffer, new RANSNx16Params(0x00), outBuffer); + } + // TODO: Add Stripe // TODO: Add Pack @@ -51,16 +64,6 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN return outBuffer; } - if (inputBuffer.remaining() < MINIMUM__ORDER_1_SIZE && ransNx16Params.getOrder() == RANSParams.ORDER.ONE) { - // TODO: check if this still applies for Nx16 or if there is a different limit - // ORDER-1 encoding of less than 4 bytes is not permitted, so just use ORDER-0 - - // First byte of the compressed output provides the order of RANS. - // So, it has to be changed to 0x00 - outBuffer.put(0,(byte) 0x00); - return compressOrder0WayN(inputBuffer, new RANSNx16Params(0x00), outBuffer); - } - switch (ransNx16Params.getOrder()) { case ZERO: return compressOrder0WayN(inputBuffer, ransNx16Params, outBuffer); diff --git a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java index cde7d42e10..e9973fb5d5 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java @@ -94,7 +94,7 @@ public Object[][] getRANS4x16TestData() throws IOException { "r4x16" // htscodecs directory where the RANSNx16 compressed files reside }); - // RANS Nx16 order 0, rle flag is set + // RANS Nx16 order 0, bitflags = 0x40. rle flag is set testCases.add(new Object[] { p, ransNx16Encode, @@ -129,6 +129,15 @@ public Object[][] getRANS4x16TestData() throws IOException { new RANSNx16Params(0x05), "r4x16" // htscodecs directory where the RANSNx16 compressed files reside }); + + // RANS Nx16 order 1, bitflags = 0x41. rle flag is set + testCases.add(new Object[] { + p, + ransNx16Encode, + ransNx16Decode , + new RANSNx16Params(0x41), + "r4x16" // htscodecs directory where the RANSNx16 compressed files reside + }); }); return testCases.toArray(new Object[][]{}); } diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index ca750cba00..91c2386aa8 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -110,8 +110,8 @@ public Object[][] getRansNx16Codecs() { //RANSNx16 formatFlags(first byte) 64: Order 0, N = 4, CAT false, RLE = true {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x40)} , -// //RANSNx16 formatFlags(first byte) 65: Order 1, N = 4, CAT false, RLE = true -// {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x41)} , + //RANSNx16 formatFlags(first byte) 65: Order 1, N = 4, CAT false, RLE = true + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x41)} , }; } From 1fb6800ca45ea4cf608efc4d2b3f993c993bffdc Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Thu, 11 Aug 2022 16:48:44 -0400 Subject: [PATCH 35/76] add encode and decode Pack. Add test cases for pack --- .../rans/ransnx16/RANSNx16Decode.java | 134 ++++++++++++++---- .../rans/ransnx16/RANSNx16Encode.java | 129 ++++++++++++++--- .../rans/ransnx16/RANSNx16Params.java | 3 +- .../samtools/cram/CRAMCodecCorpusTest.java | 38 +++++ .../cram/compression/rans/RansTest.java | 59 +++++++- 5 files changed, 307 insertions(+), 56 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 4bcdf86f2e..65b934ddba 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -27,10 +27,28 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { final int formatFlags = inBuffer.get() & 0xFF; final RANSNx16Params ransNx16Params = new RANSNx16Params(formatFlags); - // TODO: add methods to handle various flags - - // if nosz is set, then uncompressed size is not recorded. - int n_out = ransNx16Params.getNosz() ? 0 : Utils.readUint7(inBuffer); + // TODO: add methods to handle stripe + + // if nosz flag is set, then uncompressed size is not recorded. + int outSize = ransNx16Params.getNosz() ? 0 : Utils.readUint7(inBuffer); + + // if pack, get pack metadata, which will be used later to decode pack + int packDataLength = 0; + int numSymbols = 0; + int[] packMap = new int[0]; + if (ransNx16Params.getPack()){ + packDataLength = outSize; + numSymbols = inBuffer.get() & 0xFF; + + // if (numSymbols > 16 or numSymbols==0) then skip decoding Pack + if (numSymbols <= 16 & numSymbols!=0) { + packMap = new int[numSymbols]; + for (int i = 0; i < numSymbols; i++) { + packMap[i] = inBuffer.get() & 0xFF; + } + outSize = Utils.readUint7(inBuffer); + } + } // if rle, get rle metadata, which will be used later to decode rle final int uncompressedRLEMetaDataLength; @@ -39,34 +57,41 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { ByteBuffer uncompressedRLEMetaData = null; if (ransNx16Params.getRLE()){ uncompressedRLEMetaDataLength = Utils.readUint7(inBuffer); - uncompressedRLEOutputLength = n_out; - n_out = Utils.readUint7(inBuffer); + uncompressedRLEOutputLength = outSize; + outSize = Utils.readUint7(inBuffer); + // TODO: maybe move decodeRLEMeta in-line uncompressedRLEMetaData = decodeRLEMeta(inBuffer,ransNx16Params,uncompressedRLEMetaDataLength,rleSymbols); } // If CAT is set then, the input is uncompressed if (ransNx16Params.getCAT()){ - byte[] data = new byte[n_out]; - inBuffer.get( data,0, n_out); + byte[] data = new byte[outSize]; + inBuffer.get( data,0, outSize); return ByteBuffer.wrap(data); } else { - final ByteBuffer outBuffer = ByteBuffer.allocate(n_out); - switch (ransNx16Params.getOrder()){ - // TODO: remove n_out? - case ZERO: - uncompressOrder0WayN(inBuffer, outBuffer, n_out, ransNx16Params); - break; - case ONE: - uncompressOrder1WayN(inBuffer, outBuffer, n_out, ransNx16Params); - break; - default: - throw new RuntimeException("Unknown rANS order: " + ransNx16Params.getOrder()); + ByteBuffer outBuffer = ByteBuffer.allocate(outSize); + if (outSize!=0) { + switch (ransNx16Params.getOrder()) { + case ZERO: + uncompressOrder0WayN(inBuffer, outBuffer, outSize, ransNx16Params); + break; + case ONE: + uncompressOrder1WayN(inBuffer, outBuffer, outSize, ransNx16Params); + break; + default: + throw new RuntimeException("Unknown rANS order: " + ransNx16Params.getOrder()); + } } // if rle, then decodeRLE if (ransNx16Params.getRLE() & uncompressedRLEMetaData!=null ){ - return decodeRLE(outBuffer,rleSymbols,uncompressedRLEMetaData, uncompressedRLEOutputLength); + outBuffer = decodeRLE(outBuffer,rleSymbols,uncompressedRLEMetaData, uncompressedRLEOutputLength); + } + + // if pack, then decodePack + if (ransNx16Params.getPack() & packMap.length > 0) { + outBuffer = decodePack(outBuffer, packMap, numSymbols, packDataLength); } return outBuffer; } @@ -75,7 +100,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { private ByteBuffer uncompressOrder0WayN( final ByteBuffer inBuffer, final ByteBuffer outBuffer, - final int n_out, + final int outSize, final RANSNx16Params ransNx16Params) { initializeRANSDecoder(); @@ -100,11 +125,11 @@ private ByteBuffer uncompressOrder0WayN( // size of each interleaved stream // For Nway = 4, division by 4 is the same as right shift by 2 bits // For Nway = 32, division by 32 is the same as right shift by 5 bits - final int interleaveSize = (Nway == 4) ? (n_out >> 2) : (n_out >> 5); + final int interleaveSize = (Nway == 4) ? (outSize >> 2) : (outSize >> 5); // Number of elements that don't fall into the Nway streams - int remSize = n_out - (interleaveSize * Nway); - final int out_end = n_out - remSize; + int remSize = outSize - (interleaveSize * Nway); + final int out_end = outSize - remSize; for (int i = 0; i < out_end; i += Nway) { for (int r=0; r>=1; + } + } + + // 2 bits per value + else if (numSymbols <= 4){ + int v = 0; + for(int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 4 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, (byte) packMap[v & 3]); + v >>=2; + } + } + + // 4 bits per value + else if (numSymbols <= 16){ + int v = 0; + for(int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 2 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, (byte) packMap[v & 15]); + v >>=4; + } + } + inBuffer = outBufferPack; + return inBuffer; + } } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index 7a98f751ba..6a1b32b764 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -18,7 +18,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN return EMPTY_BUFFER; } final ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining()); - final int formatFlags = ransNx16Params.getFormatFlags(); + final int formatFlags = ransNx16Params.getFormatFlags() & 0xFF; outBuffer.put((byte) (formatFlags)); // one byte for formatFlags // TODO: add methods to handle various flags @@ -30,29 +30,38 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN Utils.writeUint7(insize,outBuffer); } - if (inBuffer.remaining() < MINIMUM__ORDER_1_SIZE && ransNx16Params.getOrder() == RANSParams.ORDER.ONE) { - // Do this before RLE, pack etc.. - // Make sure outBuffer does not have anything except format flag and size - - // TODO: check if this still applies for Nx16 or if there is a different limit - // ORDER-1 encoding of less than 4 bytes is not permitted, so just use ORDER-0 - - // First byte of the compressed output provides the order of RANS. - // So, it has to be changed to 0x00 - outBuffer.put(0,(byte) 0x00); - return compressOrder0WayN(inBuffer, new RANSNx16Params(0x00), outBuffer); - } + // using inputBuffer as inBuffer is declared final + ByteBuffer inputBuffer = inBuffer; // TODO: Add Stripe - // TODO: Add Pack + // Pack + if (ransNx16Params.getPack()) { + final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; + final int inSize = inBuffer.remaining(); + for (int i = 0; i < inSize; i ++) { + F[inBuffer.get(i) & 0xFF]++; + } + int numSymbols = 0; + final int[] P = new int[Constants.NUMBER_OF_SYMBOLS]; + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + if (F[i]>0) { + P[i] = numSymbols++; + } + } + + // skip Packing if numSymbols = 0 or numSymbols > 16 + if (numSymbols !=0 & numSymbols <= 16) { + inputBuffer = encodePack(inputBuffer, outBuffer, F, P, numSymbols); + } else { + // unset pack flag in the first byte of the outBuffer + outBuffer.put(0,(byte)(outBuffer.get(0) & ~RANSNx16Params.PACK_FLAG_MASK)); + } + } - // using inputBuffer as inBuffer is declared final - // TODO: should inBuffer not be declared final? - ByteBuffer inputBuffer = inBuffer; // RLE if (ransNx16Params.getRLE()){ - inputBuffer = encodeRLE(inBuffer, ransNx16Params, outBuffer); + inputBuffer = encodeRLE(inputBuffer, ransNx16Params, outBuffer); } @@ -64,6 +73,20 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN return outBuffer; } + // if after encoding pack and rle, the inputBuffer size < 4, then use order 0 + if (inputBuffer.remaining() < MINIMUM__ORDER_1_SIZE && ransNx16Params.getOrder() == RANSParams.ORDER.ONE) { + + // set order flag to "0" in the first byte of the outBuffer + outBuffer.put(0,(byte)(outBuffer.get(0) & ~RANSNx16Params.ORDER_FLAG_MASK)); + if (inputBuffer.remaining() == 0){ + outBuffer.limit(outBuffer.position()); //TODO: check if this is correct + outBuffer.rewind(); + return outBuffer; + + } + return compressOrder0WayN(inputBuffer, new RANSNx16Params(outBuffer.get(0)), outBuffer); + } + switch (ransNx16Params.getOrder()) { case ZERO: return compressOrder0WayN(inputBuffer, ransNx16Params, outBuffer); @@ -583,4 +606,74 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer ,final RANSParams ransPar return encodedData; } + private ByteBuffer encodePack( + final ByteBuffer inBuffer , + final ByteBuffer outBuffer, + final int[] F, + final int[] P, + final int numSymbols){ + + final int inSize = inBuffer.remaining(); + + ByteBuffer data; + if (numSymbols <= 1) { + data = ByteBuffer.allocate(0); + + } else if (numSymbols <= 2) { + // 1 bit per value + int dataSize = (int) Math.ceil((double) inSize/8); + data = ByteBuffer.allocate(dataSize); + data.limit(dataSize); + int j = -1; + for (int i = 0; i < inSize; i ++) { + if (i % 8 == 0) { + data.put(++j, (byte) 0); + } + data.put(j, (byte) (data.get(j) + (P[inBuffer.get(i) & 0xFF] << (i % 8)))); + } + } else if (numSymbols <= 4) { + // 2 bits per value + int dataSize = (int) Math.ceil((double) inSize/4); + data = ByteBuffer.allocate(dataSize); + data.limit(dataSize); + int j = -1; + for (int i = 0; i < inSize; i ++) { + if (i % 4 == 0) { + data.put(++j, (byte) 0); + } + data.put(j, (byte) (data.get(j) + (P[inBuffer.get(i) & 0xFF] << ((i % 4) * 2)))); + } + } else { + // 4 bits per value + int dataSize = (int) Math.ceil((double)inSize/2); + data = ByteBuffer.allocate(dataSize); + data.limit(dataSize); + int j = -1; + for (int i = 0; i < inSize; i ++) { + if (i % 2 == 0) { + data.put(++j, (byte) 0); + } + data.put(j, (byte) (data.get(j) + (P[inBuffer.get(i) & 0xFF] << ((i % 2) * 4)))); + } + } + + // write numSymbols + outBuffer.put((byte) numSymbols); + + + + int j = 0; + // TODO: What is the purpose of the variable "j"? + for(int i = 0 ; i < Constants.NUMBER_OF_SYMBOLS; i ++) { + if (F[i] > 0) { + F[i] = j++; + outBuffer.put((byte) i); + } + } + + // write the length of data + Utils.writeUint7(data.limit(), outBuffer); + return data; //pos = 0 + } + } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java index 4854ee3dd2..023b7ad06d 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java @@ -18,8 +18,7 @@ public class RANSNx16Params implements RANSParams { // and entropy encoders to be combined private int formatFlags; - // To get the least significant 7 bits of format byte - private static final int FORMAT_FLAG_MASK = 0x7f; + private static final int FORMAT_FLAG_MASK = 0xFF; public RANSNx16Params(int formatFlags) { this.formatFlags = formatFlags; diff --git a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java index e9973fb5d5..6343371e56 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java @@ -138,6 +138,44 @@ public Object[][] getRANS4x16TestData() throws IOException { new RANSNx16Params(0x41), "r4x16" // htscodecs directory where the RANSNx16 compressed files reside }); + + // RANS Nx16 order 0, bitflags = 0x80. pack flag is set + testCases.add(new Object[] { + p, + ransNx16Encode, + ransNx16Decode , + new RANSNx16Params(0x80), + "r4x16" // htscodecs directory where the RANSNx16 compressed files reside + }); + + // RANS Nx16 order 1, bitflags = 0x81. pack flag is set + testCases.add(new Object[] { + p, + ransNx16Encode, + ransNx16Decode , + new RANSNx16Params(0x81), + "r4x16" // htscodecs directory where the RANSNx16 compressed files reside + }); + + // RANS Nx16 order 0, bitflags = 0xC0. rle flag is set, pack flag is set + testCases.add(new Object[] { + p, + ransNx16Encode, + ransNx16Decode , + new RANSNx16Params(0xC0), + "r4x16" // htscodecs directory where the RANSNx16 compressed files reside + }); + + // RANS Nx16 order 1, bitflags = 0xC1. rle flag is set, pack flag is set + testCases.add(new Object[] { + p, + ransNx16Encode, + ransNx16Decode , + new RANSNx16Params(0xC1), + "r4x16" // htscodecs directory where the RANSNx16 compressed files reside + }); + + }); return testCases.toArray(new Object[][]{}); } diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 91c2386aa8..01030a52d7 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -106,13 +106,25 @@ public Object[][] getRansNx16Codecs() { //RANSNx16 formatFlags(first byte) 37: Order 1, N = 32, CAT true {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x25)} , -// + //RANSNx16 formatFlags(first byte) 64: Order 0, N = 4, CAT false, RLE = true {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x40)} , //RANSNx16 formatFlags(first byte) 65: Order 1, N = 4, CAT false, RLE = true {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x41)} , + //RANSNx16 formatFlags(first byte) 128: Order 0, N = 4, CAT false, RLE = false, Pack = true + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x80)} , + + //RANSNx16 formatFlags(first byte) 129: Order 1, N = 4, CAT false, RLE = false, Pack = true + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x81)} , + + //RANSNx16 formatFlags(first byte) 192: Order 0, N = 4, CAT false, RLE = true, Pack = true + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0xC0)} , + + //RANSNx16 formatFlags(first byte) 193: Order 1, N = 4, CAT false, RLE = true, Pack = true + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0xC1)} , + }; } @@ -158,7 +170,8 @@ public void testRans4x8BuffersMeetBoundaryExpectations( final RANS4x8Decode ransDecode, final RANS4x8Params params) { final int size = 1001; - final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(size,ransEncode, ransDecode,params); + final ByteBuffer raw = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); + final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(size,raw,ransEncode, ransDecode,params); Assert.assertTrue(compressed.limit() > 10); Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); Assert.assertEquals(compressed.getInt(), compressed.limit() - 1 - 4 - 4); @@ -171,10 +184,27 @@ public void testRansNx16BuffersMeetBoundaryExpectations( final RANSNx16Decode ransDecode, final RANSNx16Params params) { final int size = 1001; - final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(size,ransEncode, ransDecode,params); + final ByteBuffer raw = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); + final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(size,raw,ransEncode,ransDecode,params); Assert.assertTrue(compressed.limit() > 1); // minimum prefix len when input is not Empty final int FormatFlags = compressed.get(); // first byte of compressed data is the formatFlags - Assert.assertEquals(FormatFlags,params.getFormatFlags()); + raw.rewind(); + int numSym = 0; + final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; + final int inSize = raw.remaining(); + for (int i = 0; i < inSize; i ++) { + F[raw.get(i) & 0xFF]++; + } + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + if (F[i]>0) { + numSym++; + } + } + if (params.getPack() & (numSym == 0 | numSym > 16)) { + Assert.assertEquals(FormatFlags, params.getFormatFlags() & ~RANSNx16Params.PACK_FLAG_MASK); + } else { + Assert.assertEquals(FormatFlags, params.getFormatFlags()); + } // if nosz flag is not set, then the uncompressed size is recorded if (!params.getNosz()){ Assert.assertEquals(Utils.readUint7(compressed), size); @@ -206,7 +236,24 @@ public void testRansNx16Header( final ByteBuffer data = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); final ByteBuffer compressed = ransEncode.compress(data, params); // first byte of compressed data gives the formatFlags - Assert.assertEquals(compressed.get(), (byte) params.getFormatFlags()); + data.rewind(); + final int FormatFlags = compressed.get() & 0xFF; // first byte of compressed data is the formatFlags + int numSym = 0; + final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; + final int inSize = data.remaining(); + for (int i = 0; i < inSize; i ++) { + F[data.get(i) & 0xFF]++; + } + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + if (F[i]>0) { + numSym++; + } + } + if (params.getPack() & (numSym == 0 | numSym > 16)) { + Assert.assertEquals(FormatFlags, (byte) (params.getFormatFlags() & ~RANSNx16Params.PACK_FLAG_MASK)); + } else { + Assert.assertEquals(FormatFlags, (byte) params.getFormatFlags()); + } // if nosz flag is not set, then the uncompressed size is recorded if (!params.getNosz()){ Assert.assertEquals(Utils.readUint7(compressed), size); @@ -243,11 +290,11 @@ private static void ransRoundTrip( public ByteBuffer ransBufferMeetBoundaryExpectations( final int size, + final ByteBuffer raw, final RANSEncode ransEncode, final RANSDecode ransDecode, final RANSParams params){ // helper method for Boundary Expectations test - final ByteBuffer raw = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); final ByteBuffer compressed = ransEncode.compress(raw, params); final ByteBuffer uncompressed = ransDecode.uncompress(compressed); Assert.assertFalse(compressed.hasRemaining()); From d79a4cf2ce2116ae0ea91311c73583ad233d6a7d Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 16 Aug 2022 12:20:15 -0400 Subject: [PATCH 36/76] rename variable for better readability --- .../rans/ransnx16/RANSNx16Decode.java | 22 +++++----- .../rans/ransnx16/RANSNx16Encode.java | 41 ++++++++----------- 2 files changed, 28 insertions(+), 35 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 65b934ddba..ee92510b27 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -32,19 +32,19 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { // if nosz flag is set, then uncompressed size is not recorded. int outSize = ransNx16Params.getNosz() ? 0 : Utils.readUint7(inBuffer); - // if pack, get pack metadata, which will be used later to decode pack + // if pack, get pack metadata, which will be used later to decode packed data int packDataLength = 0; int numSymbols = 0; - int[] packMap = new int[0]; + int[] packMappingTable = new int[0]; if (ransNx16Params.getPack()){ packDataLength = outSize; numSymbols = inBuffer.get() & 0xFF; // if (numSymbols > 16 or numSymbols==0) then skip decoding Pack if (numSymbols <= 16 & numSymbols!=0) { - packMap = new int[numSymbols]; + packMappingTable = new int[numSymbols]; for (int i = 0; i < numSymbols; i++) { - packMap[i] = inBuffer.get() & 0xFF; + packMappingTable[i] = inBuffer.get() & 0xFF; } outSize = Utils.readUint7(inBuffer); } @@ -90,8 +90,8 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { } // if pack, then decodePack - if (ransNx16Params.getPack() & packMap.length > 0) { - outBuffer = decodePack(outBuffer, packMap, numSymbols, packDataLength); + if (ransNx16Params.getPack() & packMappingTable.length > 0) { + outBuffer = decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); } return outBuffer; } @@ -388,13 +388,13 @@ private ByteBuffer decodeRLE(ByteBuffer inBuffer , final int[] rleSymbols, final return inBuffer; } - private ByteBuffer decodePack(ByteBuffer inBuffer, final int[] packMap, int numSymbols, int uncompressedPackOutputLength) { + private ByteBuffer decodePack(ByteBuffer inBuffer, final int[] packMappingTable, int numSymbols, int uncompressedPackOutputLength) { ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength); int j = 0; if (numSymbols <= 1) { for (int i=0; i < uncompressedPackOutputLength; i++){ - outBufferPack.put(i, (byte) packMap[0]); + outBufferPack.put(i, (byte) packMappingTable[0]); } } @@ -405,7 +405,7 @@ else if (numSymbols <= 2) { if (i % 8 == 0){ v = inBuffer.get(j++); } - outBufferPack.put(i, (byte) packMap[v & 1]); + outBufferPack.put(i, (byte) packMappingTable[v & 1]); v >>=1; } } @@ -417,7 +417,7 @@ else if (numSymbols <= 4){ if (i % 4 == 0){ v = inBuffer.get(j++); } - outBufferPack.put(i, (byte) packMap[v & 3]); + outBufferPack.put(i, (byte) packMappingTable[v & 3]); v >>=2; } } @@ -429,7 +429,7 @@ else if (numSymbols <= 16){ if (i % 2 == 0){ v = inBuffer.get(j++); } - outBufferPack.put(i, (byte) packMap[v & 15]); + outBufferPack.put(i, (byte) packMappingTable[v & 15]); v >>=4; } } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index 6a1b32b764..78b6c9540e 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -37,22 +37,22 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN // Pack if (ransNx16Params.getPack()) { - final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; + final int[] frequencyTable = new int[Constants.NUMBER_OF_SYMBOLS]; final int inSize = inBuffer.remaining(); for (int i = 0; i < inSize; i ++) { - F[inBuffer.get(i) & 0xFF]++; + frequencyTable[inBuffer.get(i) & 0xFF]++; } int numSymbols = 0; - final int[] P = new int[Constants.NUMBER_OF_SYMBOLS]; + final int[] packMappingTable = new int[Constants.NUMBER_OF_SYMBOLS]; for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - if (F[i]>0) { - P[i] = numSymbols++; + if (frequencyTable[i]>0) { + packMappingTable[i] = numSymbols++; } } // skip Packing if numSymbols = 0 or numSymbols > 16 if (numSymbols !=0 & numSymbols <= 16) { - inputBuffer = encodePack(inputBuffer, outBuffer, F, P, numSymbols); + inputBuffer = encodePack(inputBuffer, outBuffer, frequencyTable, packMappingTable, numSymbols); } else { // unset pack flag in the first byte of the outBuffer outBuffer.put(0,(byte)(outBuffer.get(0) & ~RANSNx16Params.PACK_FLAG_MASK)); @@ -609,71 +609,64 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer ,final RANSParams ransPar private ByteBuffer encodePack( final ByteBuffer inBuffer , final ByteBuffer outBuffer, - final int[] F, - final int[] P, + final int[] frequencyTable, + final int[] packMappingTable, final int numSymbols){ - final int inSize = inBuffer.remaining(); - ByteBuffer data; if (numSymbols <= 1) { data = ByteBuffer.allocate(0); - } else if (numSymbols <= 2) { + // 1 bit per value int dataSize = (int) Math.ceil((double) inSize/8); data = ByteBuffer.allocate(dataSize); - data.limit(dataSize); int j = -1; for (int i = 0; i < inSize; i ++) { if (i % 8 == 0) { data.put(++j, (byte) 0); } - data.put(j, (byte) (data.get(j) + (P[inBuffer.get(i) & 0xFF] << (i % 8)))); + data.put(j, (byte) (data.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << (i % 8)))); } } else if (numSymbols <= 4) { + // 2 bits per value int dataSize = (int) Math.ceil((double) inSize/4); data = ByteBuffer.allocate(dataSize); - data.limit(dataSize); int j = -1; for (int i = 0; i < inSize; i ++) { if (i % 4 == 0) { data.put(++j, (byte) 0); } - data.put(j, (byte) (data.get(j) + (P[inBuffer.get(i) & 0xFF] << ((i % 4) * 2)))); + data.put(j, (byte) (data.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 4) * 2)))); } } else { + // 4 bits per value int dataSize = (int) Math.ceil((double)inSize/2); data = ByteBuffer.allocate(dataSize); - data.limit(dataSize); int j = -1; for (int i = 0; i < inSize; i ++) { if (i % 2 == 0) { data.put(++j, (byte) 0); } - data.put(j, (byte) (data.get(j) + (P[inBuffer.get(i) & 0xFF] << ((i % 2) * 4)))); + data.put(j, (byte) (data.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 2) * 4)))); } } // write numSymbols outBuffer.put((byte) numSymbols); - - - int j = 0; - // TODO: What is the purpose of the variable "j"? + // write mapping table "packMappingTable" that converts mapped value to original symbol for(int i = 0 ; i < Constants.NUMBER_OF_SYMBOLS; i ++) { - if (F[i] > 0) { - F[i] = j++; + if (frequencyTable[i] > 0) { outBuffer.put((byte) i); } } // write the length of data Utils.writeUint7(data.limit(), outBuffer); - return data; //pos = 0 + return data; // Here position = 0 since we have always accessed the data buffer using index } } \ No newline at end of file From 68994c0cca65dbb5a9cd488bc1b36eb21b870bda Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Fri, 19 Aug 2022 15:08:23 -0400 Subject: [PATCH 37/76] add exception when num of distinct symbols = 0 or > 16 --- .../cram/compression/rans/ransnx16/RANSNx16Decode.java | 6 +++++- .../cram/compression/rans/ransnx16/RANSNx16Encode.java | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index ee92510b27..7384cd4ec7 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.compression.rans.ransnx16; +import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSDecode; @@ -40,13 +41,15 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { packDataLength = outSize; numSymbols = inBuffer.get() & 0xFF; - // if (numSymbols > 16 or numSymbols==0) then skip decoding Pack + // if (numSymbols > 16 or numSymbols==0), raise exception if (numSymbols <= 16 & numSymbols!=0) { packMappingTable = new int[numSymbols]; for (int i = 0; i < numSymbols; i++) { packMappingTable[i] = inBuffer.get() & 0xFF; } outSize = Utils.readUint7(inBuffer); + } else { + throw new CRAMException("Bit Packing is not permitted when number of distinct symbols is greater than 16 or equal to 0. Number of distinct symbols: " + numSymbols); } } @@ -436,4 +439,5 @@ else if (numSymbols <= 16){ inBuffer = outBufferPack; return inBuffer; } + } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index 78b6c9540e..d4826f8a5b 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -38,9 +38,9 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN // Pack if (ransNx16Params.getPack()) { final int[] frequencyTable = new int[Constants.NUMBER_OF_SYMBOLS]; - final int inSize = inBuffer.remaining(); + final int inSize = inputBuffer.remaining(); for (int i = 0; i < inSize; i ++) { - frequencyTable[inBuffer.get(i) & 0xFF]++; + frequencyTable[inputBuffer.get(i) & 0xFF]++; } int numSymbols = 0; final int[] packMappingTable = new int[Constants.NUMBER_OF_SYMBOLS]; From 8d93534e18f062418252b0d8a29bab2ea08ea98c Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 23 Aug 2022 17:58:27 -0400 Subject: [PATCH 38/76] Add Decode Stripe to RANS Nx16. Add getFormatFlags() to RANSParams --- .../cram/compression/rans/RANSParams.java | 2 + .../rans/rans4x8/RANS4x8Params.java | 4 ++ .../rans/ransnx16/RANSNx16Decode.java | 47 +++++++++++++- .../rans/ransnx16/RANSNx16Encode.java | 7 +- .../samtools/cram/CRAMCodecCorpusTest.java | 64 +++++++++++++++---- 5 files changed, 107 insertions(+), 17 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSParams.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSParams.java index ba87594e4e..d664b24c3a 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSParams.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSParams.java @@ -14,6 +14,8 @@ public static ORDER fromInt(final int orderValue) { } } + int getFormatFlags(); + ORDER getOrder(); } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java index 223db704e2..49694977c2 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java @@ -20,4 +20,8 @@ public ORDER getOrder() { return order; } + public int getFormatFlags(){ + return order.ordinal(); + } + } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 7384cd4ec7..fb36d3872a 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -17,6 +17,10 @@ public class RANSNx16Decode extends RANSDecode { private static final int FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK = 0x01; public ByteBuffer uncompress(final ByteBuffer inBuffer) { + return uncompressStream(inBuffer, 0); + } + + public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } @@ -28,10 +32,13 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { final int formatFlags = inBuffer.get() & 0xFF; final RANSNx16Params ransNx16Params = new RANSNx16Params(formatFlags); - // TODO: add methods to handle stripe - // if nosz flag is set, then uncompressed size is not recorded. - int outSize = ransNx16Params.getNosz() ? 0 : Utils.readUint7(inBuffer); + outSize = ransNx16Params.getNosz() ? outSize : Utils.readUint7(inBuffer); + + // if stripe, then decodeStripe + if (ransNx16Params.getStripe()) { + return decodeStripe(inBuffer, outSize); + } // if pack, get pack metadata, which will be used later to decode packed data int packDataLength = 0; @@ -440,4 +447,38 @@ else if (numSymbols <= 16){ return inBuffer; } + private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){ + + final int numInterleaveStreams = inBuffer.get() & 0xFF; + + // retrieve lengths of compressed interleaved streams + int[] clen = new int[numInterleaveStreams]; + for ( int j=0; j j){ + ulen[j]++; + } + + T[j] = uncompressStream(inBuffer, ulen[j]); + } + + // Transpose + ByteBuffer out = ByteBuffer.allocate(outSize); + for (int j = 0; j Date: Thu, 25 Aug 2022 22:17:44 -0400 Subject: [PATCH 39/76] Add test for Encoding when Stripe Flag is set --- .../samtools/cram/CRAMCodecCorpusTest.java | 12 ++-- .../cram/compression/rans/RansTest.java | 55 +++++++++++++++++-- 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java index 1d9ce98904..e7221e37af 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java @@ -31,7 +31,7 @@ import static htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Params.STRIPE_FLAG_MASK; /** - * HTSCodecs test data is kept in a separate repository, currently at https://github.com/jkbonfield/htscodecs-corpus + * HTSCodecs test data is kept in a separate repository, currently at https://github.com/jkbonfield/htscodecs-corpus, * so it can be shared across htslib/samtools/htsjdk. */ public class CRAMCodecCorpusTest extends HtsjdkTest { @@ -230,18 +230,18 @@ public void testRANSRoundTrip( // If Stripe Flag is set, skip the round trip test as encoding is not implemented for this case. // TODO: Assert raise Exception - System.out.println(String.format("Stripe Flag is set. Skipping testRANSRoundTrip for file: %s. " + + System.out.printf("Stripe Flag is set. Skipping testRANSRoundTrip for file: %s. " + "Format Flags: %s . The current RANSNx16 implementation does not " + - "support encoding when Stripe Flag is set", inputTestDataPath.toString(), params.getFormatFlags())); + "support encoding when Stripe Flag is set%n", inputTestDataPath.toString(), params.getFormatFlags()); } else { final ByteBuffer compressedBytes = ransEncode.compress(uncompressedBytes, params); uncompressedBytes.rewind(); - System.out.println(String.format("filename:%s %s Uncompressed: (%,d) Compressed: (%,d)", + System.out.printf("filename:%s %s Uncompressed: (%,d) Compressed: (%,d)%n", inputTestDataPath.getFileName(), params.toString(), uncompressedBytes.remaining(), - compressedBytes.remaining())); + compressedBytes.remaining()); Assert.assertEquals(ransDecode.uncompress(compressedBytes), uncompressedBytes); } } @@ -254,7 +254,7 @@ public void testRANSRoundTrip( "Uncompress the existing compressed file using htsjdk RANS and compare it with the original file.") public void testRANSPreCompressed( final Path inputTestDataPath, - final RANSEncode ransEncode, + final RANSEncode unused, final RANSDecode ransDecode, final RANSParams params, final String CompressedDirname) throws IOException { diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 01030a52d7..c8721f669f 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -1,6 +1,7 @@ package htsjdk.samtools.cram.compression.rans; import htsjdk.HtsjdkTest; +import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; @@ -12,7 +13,6 @@ import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; - import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Random; @@ -23,7 +23,7 @@ * Created by vadim on 22/04/2015. */ public class RansTest extends HtsjdkTest { - private Random random = new Random(TestUtil.RANDOM_SEED); + private final Random random = new Random(TestUtil.RANDOM_SEED); // Since some of our test cases use very large byte arrays, so enclose them in a wrapper class since // otherwise IntelliJ serializes them to strings for display in the test output, which is *super*-slow. @@ -37,9 +37,14 @@ public String toString() { } } + public Object[][] getRansEmptyTestData() { + return new Object[][]{ + { new TestDataEnvelope(new byte[]{}) }, + }; + } + public Object[][] getRansTestData() { return new Object[][] { - { new TestDataEnvelope(new byte[]{}) }, { new TestDataEnvelope(new byte[] {0}) }, { new TestDataEnvelope(new byte[] {0, 1}) }, { new TestDataEnvelope(new byte[] {0, 1, 2}) }, @@ -128,6 +133,26 @@ public Object[][] getRansNx16Codecs() { }; } + public Object[][] getRansNx16DecodeOnlyCodecs() { + final RANSNx16Encode ransNx16Encode = new RANSNx16Encode(); + final RANSNx16Decode ransNx16Decode = new RANSNx16Decode(); + return new Object[][]{ + + //RANSNx16 formatFlags(first byte) 8: Order 0, N = 4, CAT false, RLE = false, Pack = false, Stripe = True + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x08)}, + + //RANSNx16 formatFlags(first byte) 9: Order 1, N = 4, CAT false, RLE = false, Pack = false, Stripe = True + {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x09)} + }; + } + + @DataProvider(name="RansNx16DecodeOnlyAndData") + public Object[][] getRansNx16DecodeOnlyAndData() { + + // this data provider provides all the testdata except empty input for RANS Nx16 codec + return TestNGUtils.cartesianProduct(getRansNx16DecodeOnlyCodecs(), getRansTestData()); + } + public Object[][] getAllRansCodecs() { // concatenate RANS4x8 and RANSNx16 codecs return Stream.concat(Arrays.stream(getRans4x8Codecs()), Arrays.stream(getRansNx16Codecs())) @@ -136,13 +161,18 @@ public Object[][] getAllRansCodecs() { @DataProvider(name="allRansAndData") public Object[][] getAllRansAndData() { + // this data provider provides all the testdata for all of RANS codecs // params: RANSEncode, RANSDecode, RANSParams, data - return TestNGUtils.cartesianProduct(getAllRansCodecs(), getRansTestData()); + return Stream.concat( + Arrays.stream(TestNGUtils.cartesianProduct(getAllRansCodecs(), getRansTestData())), + Arrays.stream(TestNGUtils.cartesianProduct(getAllRansCodecs(), getRansEmptyTestData()))) + .toArray(Object[][]::new); } @DataProvider(name="allRansAndDataForTinySmallLarge") public Object[][] getAllRansAndDataForTinySmallLarge() { + // this data provider provides Tiny, Small and Large testdata for all of RANS codecs // params: RANSEncode, RANSDecode, RANSParams, data, lower limit, upper limit return TestNGUtils.cartesianProduct(getAllRansCodecs(), getRansTestDataTinySmallLarge()); @@ -269,6 +299,23 @@ public void testRoundTrip( ransRoundTrip(ransEncode, ransDecode, params, ByteBuffer.wrap(td.testArray)); } + @Test( + dataProvider = "RansNx16DecodeOnlyAndData", + expectedExceptions = { CRAMException.class }, + expectedExceptionsMessageRegExp = "RANSNx16 Encoding with Stripe Flag is not implemented.") + public void testRansNx16EncodeStripe( + final RANSNx16Encode ransEncode, + final RANSNx16Decode unused, + final RANSNx16Params params, + final TestDataEnvelope td) { + + // When td is not Empty, Encoding with Stripe Flag should throw an Exception + // as Encode Stripe is not implemented + final ByteBuffer compressed = ransEncode.compress(ByteBuffer.wrap(td.testArray), params); + } + + // TODO: Add Test to DecodePack with nsym > 16 + private static void ransRoundTrip( final RANSEncode ransEncode, final RANSDecode ransDecode, From 14df7b233224ba7588bbdbade44f7d506734239d Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Fri, 26 Aug 2022 10:30:52 -0400 Subject: [PATCH 40/76] Fix Spot Bugs warn - Use && for logical and --- .../cram/compression/rans/ransnx16/RANSNx16Decode.java | 6 +++--- .../cram/compression/rans/ransnx16/RANSNx16Encode.java | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index fb36d3872a..7cb7861119 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -49,7 +49,7 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { numSymbols = inBuffer.get() & 0xFF; // if (numSymbols > 16 or numSymbols==0), raise exception - if (numSymbols <= 16 & numSymbols!=0) { + if (numSymbols <= 16 && numSymbols!=0) { packMappingTable = new int[numSymbols]; for (int i = 0; i < numSymbols; i++) { packMappingTable[i] = inBuffer.get() & 0xFF; @@ -95,12 +95,12 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { } // if rle, then decodeRLE - if (ransNx16Params.getRLE() & uncompressedRLEMetaData!=null ){ + if (ransNx16Params.getRLE() && uncompressedRLEMetaData!=null ){ outBuffer = decodeRLE(outBuffer,rleSymbols,uncompressedRLEMetaData, uncompressedRLEOutputLength); } // if pack, then decodePack - if (ransNx16Params.getPack() & packMappingTable.length > 0) { + if (ransNx16Params.getPack() && packMappingTable.length > 0) { outBuffer = decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); } return outBuffer; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index d6cf827726..ba7fe62859 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -56,7 +56,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN } // skip Packing if numSymbols = 0 or numSymbols > 16 - if (numSymbols !=0 & numSymbols <= 16) { + if (numSymbols !=0 && numSymbols <= 16) { inputBuffer = encodePack(inputBuffer, outBuffer, frequencyTable, packMappingTable, numSymbols); } else { // unset pack flag in the first byte of the outBuffer @@ -282,7 +282,7 @@ private ByteBuffer compressOrder1WayN ( //intialize symbol symbol[r]=0; - if((interleaveStreamIndex[r]+1 >= 0) & (r!= Nway-1)){ + if((interleaveStreamIndex[r]+1 >= 0) && (r!= Nway-1)){ symbol[r] = inBuffer.get(interleaveStreamIndex[r] + 1); } if ( r == Nway-1 ){ From 7f7e613de7cad90f73aaaef7434e52fbe2e20b74 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 6 Sep 2022 12:50:42 -0400 Subject: [PATCH 41/76] Addressing the feedback from Aug 30, 2022 --- .../compression/rans/ArithmeticDecoder.java | 10 +- .../rans/rans4x8/RANS4x8Encode.java | 8 +- .../rans/rans4x8/RANS4x8Params.java | 2 +- .../rans/ransnx16/RANSNx16Decode.java | 3 +- .../htsjdk/samtools/cram/CRAMCodecCorpus.java | 45 -------- ...ecCorpusTest.java => RANSInteropTest.java} | 90 ++++++---------- .../samtools/cram/RANSInteropTestUtils.java | 43 ++++++++ .../samtools/cram/InteropTest/RANS/qvar | 100 ++++++++++++++++++ .../cram/InteropTest/RANS/r4x16/qvar.0 | Bin 0 -> 32987 bytes .../cram/InteropTest/RANS/r4x8/qvar.0 | Bin 0 -> 32997 bytes 10 files changed, 186 insertions(+), 115 deletions(-) delete mode 100644 src/test/java/htsjdk/samtools/cram/CRAMCodecCorpus.java rename src/test/java/htsjdk/samtools/cram/{CRAMCodecCorpusTest.java => RANSInteropTest.java} (76%) create mode 100644 src/test/java/htsjdk/samtools/cram/RANSInteropTestUtils.java create mode 100644 src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/qvar create mode 100644 src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/r4x16/qvar.0 create mode 100644 src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/r4x8/qvar.0 diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java index 6988fb8df6..e8c128a6e2 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java @@ -24,24 +24,22 @@ */ package htsjdk.samtools.cram.compression.rans; -import static htsjdk.samtools.cram.compression.rans.Constants.NUMBER_OF_SYMBOLS; - final public class ArithmeticDecoder { - public final int[] freq = new int[NUMBER_OF_SYMBOLS]; - public final int[] cumulativeFreq = new int[NUMBER_OF_SYMBOLS]; + public final int[] freq = new int[Constants.NUMBER_OF_SYMBOLS]; + public final int[] cumulativeFreq = new int[Constants.NUMBER_OF_SYMBOLS]; // reverse lookup table public byte[] reverseLookup = new byte[Constants.TOTAL_FREQ]; public ArithmeticDecoder() { - for (int i = 0; i < NUMBER_OF_SYMBOLS; i++) { + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { freq[i] = 0; cumulativeFreq[i] = 0; } } public void reset() { - for (int i = 0; i < NUMBER_OF_SYMBOLS; i++) { + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { freq[i] = 0; cumulativeFreq[i] = 0; } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index 2d58a2988e..35b11fc44c 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -9,8 +9,6 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; -import static htsjdk.samtools.cram.compression.rans.Constants.NUMBER_OF_SYMBOLS; - public class RANS4x8Encode extends RANSEncode { private static final int ORDER_BYTE_LENGTH = 1; private static final int COMPRESSED_BYTE_LENGTH = 4; @@ -394,7 +392,7 @@ private static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { // Note: maximum possible rle = 254 // rle requires atmost 1 byte if (rle == 0 && j != 0 && F[j - 1] != 0) { - for (rle = j + 1; rle < NUMBER_OF_SYMBOLS && F[rle] != 0; rle++) + for (rle = j + 1; rle < Constants.NUMBER_OF_SYMBOLS && F[rle] != 0; rle++) ; rle -= j + 1; cp.put((byte) rle); @@ -442,7 +440,7 @@ private static int writeFrequenciesOrder1(final ByteBuffer cp, final int[][] F) // FIXME: could use order-0 statistics to observe which alphabet // symbols are present and base RLE on that ordering instead. if (i != 0 && T[i - 1] != 0) { - for (rle_i = i + 1; rle_i < NUMBER_OF_SYMBOLS && T[rle_i] != 0; rle_i++) + for (rle_i = i + 1; rle_i < Constants.NUMBER_OF_SYMBOLS && T[rle_i] != 0; rle_i++) ; rle_i -= i + 1; cp.put((byte) rle_i); @@ -460,7 +458,7 @@ private static int writeFrequenciesOrder1(final ByteBuffer cp, final int[][] F) } else { cp.put((byte) j); if (rle_j == 0 && j != 0 && F_i_[j - 1] != 0) { - for (rle_j = j + 1; rle_j < NUMBER_OF_SYMBOLS && F_i_[rle_j] != 0; rle_j++) + for (rle_j = j + 1; rle_j < Constants.NUMBER_OF_SYMBOLS && F_i_[rle_j] != 0; rle_j++) ; rle_j -= j + 1; cp.put((byte) rle_j); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java index 49694977c2..024ae37d53 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java @@ -4,7 +4,7 @@ public class RANS4x8Params implements RANSParams { - private ORDER order; + private final ORDER order; public RANS4x8Params(ORDER order) { this.order = order; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 7cb7861119..d784c9413c 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -359,10 +359,9 @@ private ByteBuffer decodeRLEMeta(final ByteBuffer inBuffer , final RANSParams ra uncompressedRLEMetaData = ByteBuffer.wrap(uncompressedRLEMetaDataArray); } else { compressedRLEMetaDataLength = Utils.readUint7(inBuffer); - ByteBuffer compressedRLEMetaData = ByteBuffer.allocate(compressedRLEMetaDataLength); byte[] compressedRLEMetaDataArray = new byte[compressedRLEMetaDataLength]; inBuffer.get(compressedRLEMetaDataArray,0,compressedRLEMetaDataLength); - compressedRLEMetaData = ByteBuffer.wrap(compressedRLEMetaDataArray); + ByteBuffer compressedRLEMetaData = ByteBuffer.wrap(compressedRLEMetaDataArray); compressedRLEMetaData.order(ByteOrder.LITTLE_ENDIAN); uncompressedRLEMetaData = ByteBuffer.allocate(uncompressedRLEMetaDataLength / 2); diff --git a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpus.java b/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpus.java deleted file mode 100644 index 2283650256..0000000000 --- a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpus.java +++ /dev/null @@ -1,45 +0,0 @@ -package htsjdk.samtools.cram; - -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; - -/** - * HTSCodecs test data is kept in a separate repository, currently at https://github.com/jkbonfield/htscodecs-corpus - * so it can be shared across htslib/samtools/htsjdk. - */ -public class CRAMCodecCorpus { - public static final String HTSCODECS_TEST_DATA_ENV = "HTSCODECS_TEST_DATA"; - - /** - * @return true if htscodecs test data is available, otherwise false - */ - public static boolean isHtsCodecsTestDataAvailable() { - final Path testDataPath = getHTSCodecsTestDataLocation(); - return Files.exists(testDataPath); - } - - /** - * @return throws a RuntimeException if the htscodecs test data repo is not available locally - */ - public static void assertHTSCodecsTestDataAvailable() { - if (!isHtsCodecsTestDataAvailable()) { - throw new RuntimeException( - String.format( - "No HTS codecs test data found." + - " The %s environment variable must be set to the location of the local hts codecs test data.", - HTSCODECS_TEST_DATA_ENV)); - } - } - - /** - * @return the name and location of the local hts codecs test data as specified by the environment - * variable HTSCODECS_TEST_DATA, or the default value of "../htscodecs-corpus" if the environment - * variable is not set - */ - public static Path getHTSCodecsTestDataLocation() { - final String htsCodecsTestLocation = System.getenv(HTSCODECS_TEST_DATA_ENV); - return htsCodecsTestLocation == null ? Paths.get("../htscodecs/tests") : Paths.get(htsCodecsTestLocation); - } - -} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java similarity index 76% rename from src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java rename to src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index e7221e37af..c345ef9f58 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMCodecCorpusTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -28,20 +28,17 @@ import java.util.List; import java.util.stream.Stream; -import static htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Params.STRIPE_FLAG_MASK; - /** - * HTSCodecs test data is kept in a separate repository, currently at https://github.com/jkbonfield/htscodecs-corpus, + * Interop test data is kept in a separate repository, currently at https://github.com/samtools/htscodecs * so it can be shared across htslib/samtools/htsjdk. */ -public class CRAMCodecCorpusTest extends HtsjdkTest { +public class RANSInteropTest extends HtsjdkTest { @Test - public void testGetHTSCodecsCorpus() { - if (!CRAMCodecCorpus.isHtsCodecsTestDataAvailable()) { + public void testGetHTSCodecsCorpus() throws SkipException{ + if (!RANSInteropTestUtils.isInteropTestDataAvailable()) { throw new SkipException(String.format( - "No HTS codecs test data found." + - " The %s environment variable must be set to the location of the local hts codecs test data.", - CRAMCodecCorpus.HTSCODECS_TEST_DATA_ENV)); + "No RANS Interop test data found at location: %s", + RANSInteropTestUtils.INTEROP_TEST_FILES_PATH)); } } @@ -49,7 +46,9 @@ public void testGetHTSCodecsCorpus() { // RANS tests ///////////////////////////////////////////////////////////////////////////////////////////////// - //TODO: the TestDataProviders tests fail if the hts codecs corpus isn't available because + // TODO: the TestDataProviders tests fail if the hts codecs corpus isn't available. For time being, + // we fix this by adding some small test files, which would later be replaced by a more permanent + // solution like adding the tests directly from samtools/hts-codecs using git submodule // RANS4x8 codecs and testdata public Object[][] getRANS4x8TestData() throws IOException { @@ -57,7 +56,7 @@ public Object[][] getRANS4x8TestData() throws IOException { final RANS4x8Encode rans4x8Encode = new RANS4x8Encode(); final RANS4x8Decode rans4x8Decode = new RANS4x8Decode(); final List testCases = new ArrayList<>(); - getHtsCodecRANSTestFiles().stream() + getInteropRANSTestFiles() .forEach(p -> { // RANS 4x8 order 0 @@ -85,7 +84,7 @@ public Object[][] getRANS4x16TestData() throws IOException { final RANSNx16Encode ransNx16Encode = new RANSNx16Encode(); final RANSNx16Decode ransNx16Decode = new RANSNx16Decode(); final List testCases = new ArrayList<>(); - getHtsCodecRANSTestFiles().stream() + getInteropRANSTestFiles() .forEach(p -> { // RANS Nx16 order 0, none of the bit flags are set @@ -213,12 +212,12 @@ public Object[][] getAllRansCodecs() throws IOException { description = "Roundtrip using htsjdk RANS. Compare the output with the original file" ) public void testRANSRoundTrip( final Path inputTestDataPath, - final RANSEncode ransEncode, + final RANSEncode ransEncode, final RANSDecode ransDecode, final RANSParams params, - final String unusedCompressedDirname) throws IOException { - if (!CRAMCodecCorpus.isHtsCodecsTestDataAvailable()) { - throw new SkipException("htscodecs test data is not available locally"); + final String unusedCompressedDirname) throws IOException, SkipException { + if (!RANSInteropTestUtils.isInteropTestDataAvailable()) { + throw new SkipException("Interop test data is not available locally"); } try (final InputStream is = Files.newInputStream(inputTestDataPath)) { @@ -226,22 +225,11 @@ public void testRANSRoundTrip( // by filtering out the embedded newlines, and then round trip through RANS and compare the // results final ByteBuffer uncompressedBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(is))); - if ((params.getFormatFlags() & STRIPE_FLAG_MASK)!=0) { - // If Stripe Flag is set, skip the round trip test as encoding is not implemented for this case. - // TODO: Assert raise Exception - System.out.printf("Stripe Flag is set. Skipping testRANSRoundTrip for file: %s. " + - "Format Flags: %s . The current RANSNx16 implementation does not " + - "support encoding when Stripe Flag is set%n", inputTestDataPath.toString(), params.getFormatFlags()); - } - else { + // If Stripe Flag is set, skip the round trip test as encoding is not implemented for this case. + if ((params.getFormatFlags() & RANSNx16Params.STRIPE_FLAG_MASK)==0) { final ByteBuffer compressedBytes = ransEncode.compress(uncompressedBytes, params); uncompressedBytes.rewind(); - System.out.printf("filename:%s %s Uncompressed: (%,d) Compressed: (%,d)%n", - inputTestDataPath.getFileName(), - params.toString(), - uncompressedBytes.remaining(), - compressedBytes.remaining()); Assert.assertEquals(ransDecode.uncompress(compressedBytes), uncompressedBytes); } } @@ -253,54 +241,44 @@ public void testRANSRoundTrip( description = "Compress the original file using htsjdk RANS and compare it with the existing compressed file. " + "Uncompress the existing compressed file using htsjdk RANS and compare it with the original file.") public void testRANSPreCompressed( - final Path inputTestDataPath, - final RANSEncode unused, + final Path uncompressedInteropPath, + final RANSEncode unused, final RANSDecode ransDecode, final RANSParams params, - final String CompressedDirname) throws IOException { - if (!CRAMCodecCorpus.isHtsCodecsTestDataAvailable()) { - throw new SkipException("htscodecs test data is not available locally"); + final String compressedInteropDirName) throws IOException, SkipException { + if (!RANSInteropTestUtils.isInteropTestDataAvailable()) { + throw new SkipException("Interop test data is not available locally"); } - final Path preCompressedDataPath = getCompressedRANSPath(CompressedDirname,inputTestDataPath, params); + final Path preCompressedInteropPath = getCompressedRANSPath(compressedInteropDirName,uncompressedInteropPath, params); - try (final InputStream inputStream = Files.newInputStream(inputTestDataPath); - final InputStream preCompressedInputStream = Files.newInputStream(preCompressedDataPath); + try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); + final InputStream preCompressedInteropStream = Files.newInputStream(preCompressedInteropPath) ) { // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results - final ByteBuffer inputBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(inputStream))); - - final ByteBuffer preCompressedInputBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInputStream)); - - // commenting as htsjdkCompressedBytes is not used anywhere - // Use htsjdk to compress the input file from htscodecs repo -// final ByteBuffer htsjdkCompressedBytes = ransEncode.compress(inputBytes, params); -// inputBytes.rewind(); + final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); -// // commenting as the comparison of compressed bytes is not needed to ensure interoperability. -// // Compare the htsjdk compressed bytes with the precompressed file from htscodecs repo -// Assert.assertEquals(htsjdkCompressedBytes, preCompressedInputBytes); + final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream)); // Use htsjdk to uncompress the precompressed file from htscodecs repo - final ByteBuffer htsjdkUncompressedBytes = ransDecode.uncompress(preCompressedInputBytes); + final ByteBuffer uncompressedHtsjdkBytes = ransDecode.uncompress(preCompressedInteropBytes); // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo - Assert.assertEquals(htsjdkUncompressedBytes, inputBytes); + Assert.assertEquals(uncompressedHtsjdkBytes, uncompressedInteropBytes); } catch (NoSuchFileException ex){ - // if precompressed file or input file is not present - System.out.println("Skipping testRANSPrecompressed as either input file " + + throw new SkipException("Skipping testRANSPrecompressed as either input file " + "or precompressed file is missing. File Missing: " + ex.getMessage()); } } - // return a list of all RANS test data files in the htscodecs test directory - private List getHtsCodecRANSTestFiles() throws IOException { - CRAMCodecCorpus.assertHTSCodecsTestDataAvailable(); + // return a list of all RANS test data files in the InteropTest/RANS directory + private List getInteropRANSTestFiles() throws IOException, SkipException { + RANSInteropTestUtils.assertHTSCodecsTestDataAvailable(); final List paths = new ArrayList<>(); Files.newDirectoryStream( - CRAMCodecCorpus.getHTSCodecsTestDataLocation().resolve("dat"), + RANSInteropTestUtils.getInteropTestDataLocation().resolve("RANS"), path -> path.getFileName().startsWith("q4") || path.getFileName().startsWith("q8") || path.getFileName().startsWith("qvar") || diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTestUtils.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTestUtils.java new file mode 100644 index 0000000000..e5e20959c8 --- /dev/null +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTestUtils.java @@ -0,0 +1,43 @@ +package htsjdk.samtools.cram; + +import org.testng.SkipException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * Interop test data is kept in a separate repository, currently at https://github.com/samtools/htscodecs + * so it can be shared across htslib/samtools/htsjdk. + */ +public class RANSInteropTestUtils { + public static final String INTEROP_TEST_FILES_PATH = "src/test/resources/htsjdk/samtools/cram/InteropTest/"; + + /** + * @return true if interop test data is available, otherwise false + */ + public static boolean isInteropTestDataAvailable() { + final Path testDataPath = getInteropTestDataLocation(); + return Files.exists(testDataPath); + } + + /** + * @return throws a SkipException if the interop test data is not available locally + */ + public static void assertHTSCodecsTestDataAvailable() { + if (!isInteropTestDataAvailable()) { + throw new SkipException( + String.format( + "No RANS Interop test data found at location: %s", + INTEROP_TEST_FILES_PATH)); + } + } + + /** + * @return the name and location of the local interop test data as specified by the + * variable INTEROP_TEST_FILES_PATH + */ + public static Path getInteropTestDataLocation() { + return Paths.get(INTEROP_TEST_FILES_PATH); + } + +} \ No newline at end of file diff --git a/src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/qvar b/src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/qvar new file mode 100644 index 0000000000..acb3335ce0 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/qvar @@ -0,0 +1,100 @@ +S9$#6#6%7654A5634A;$<65D67144>457$4A6774=5754>9%34>%#%4A588%A#76234=47%$4A954>7%34>9244966$3<7864A5755A687A678A7$74D79"43A5789/?671$$A775$A7735D878A<1#66$4>6724A652A67$3>7%4:561>679.6914A699D768$47%34987$5>8A6%%45A6777A66#5A68$5A7334>7815>47#24D57144>52%A663:85>575%>66734=5%4A572$5A67:87#44A6774D:544A85$$>478A561$4=5%54>4"6#$8A76$$4>666143<56%54;6#%4D66344;6624A45#4A9=72>9614#6A78#44:9D8965>5644A876?67335A9%81#%;46%45#$4A6744:63;6774$>7775>:64$967625A7675A874A;7#4565"24%61%46634%6$#35%6%4$7$#%47$%4756345727$690<345>5775D8625A6677"<4623%4>572$4<$%<5"514A57$;46%23;:54>6#3<4#6<5734$8:582$4>56134;6#$4>576<684474$3>$24>766$34A954A6832#;57124%%"64%>6634A7$7/:671$4A5%"%%A9277>6>46"%4<62"447$4A66144:9#7#4A#8144<614>6$$%34<563344<67"4<%#23;5664>6624$%$#"<$""4<"5%$$4>6#%">573244A8##2#"1#$#$2$$$#$#""$$"##"%%"#"#"##$###"#""%%$233662#%347614455#5$476345>98$#24A68345D4624A57#45:8678%666$>83A56$4D768;86624A66344A77144A67745D7$54>7525A567$$>59<#25A%:;:674<59$5A7667<;8777A:9%444@59337;9761%24A5#44A574C975A769;7725A7%6#334A76#A67$4A5633>%6$%4<56%4;5124>55124;5$#45"$#4>6234>6574A7%3A79"44A;527<:734A:785/$3498798759:B%4AB<2.@A??@<:@75:D?B::<6=6@?81#%#$:<:7#>:@BAB<@:=6565%95=@%5A749=%>87:9:7?:>6A6566?9>B?7=<<;;78C6695<859D7"98475<9>6=8:3<6=6;$@8<78%434244346"573682%;8796442$5$5856437$%338=6:269=8=5=7:1657/5242552868553<;;535418564373878828;=7838:=$87#68/72418263%54818$=:9:;7:4858-6$%4658283%8579=6;:97$42:58280<587/86;755<9=79#5687$5$45%576836687549=%7;768#%1534435%165#;;%7/78889%;475$952186887:=785<==:9=78365:9295:97<18378<7<9=8@7:7:96:<>A7;;58<7<$@$#.<%5<=:@7678$:57539B>><>877=6@5>9=$B<=:5:@><687?:?=<976C=>8=476:754/978A:7872."$$##$###480745?;4358;C$72A3961?:68<276768/78$4:?59794?><$6:C=;@A96>94.4665:7845894@86<5;=%34>9>6$9A=$;2516878A79;@%7$%49:79B45656A;68;887A:A;9?5>8>;:=A<:B;8549=8784<>>:A62/828%58=5@A2085443A:<89>5>888AE4A:A46?<88%29898D;>DB8:$@9@9;8:4:?898@6786=54@8;8>8D89;A7>6:;4739234575A<>@>88;@7@;8434668AA857:4:6<5@978A84:6%9?5??A8869<<<8A:;:::5%9=52596=7@>AA8@E;98<76A=4C=7=88@63A4$A9>6:8$>863D95:?898=?8@8666>$%4<8@>?983>88B=9<@D7<<7:9;8A6;%6A898:A44@E=:$@A?5A#$%9=%47>B=9D:@7@D:DE98993D%223=198;;<$8998998>96>89898A649@=9$69%;?A%6@433:<99;:;#98984B?>9>98587?885=A>8<#4>A?>?%>$%>9979%55#6%935@A;984$>678>5579<;6;:8889A=:A7?7A89"%A8685$>88;6:?99<@88>=>A=894>AAAB85?9955:9::::".:;=;89:?5;:<;64@<8#878988899A=8776:8>8;<%=<:#<8:A99<44@9>9;@89<@E;99@>=97@89<96<88=549<9@889:>?8@9983:432674A79>79:;:A<99%34=>;;?88989;<;8@A?1<:34;;$$19;AB8;@A;A859:?>26?68>6$6:9A<88?:9<;99:74@A=>:7;$8:98:;>>6C7=;@A<8;%9593232<6>=;;=<@9;C=5$37>;?98AA8>;5>D?96<986A7#371%8%95=6%4>8545$#5%=:88388:6$D9AB#?A<829:8A=95:>8$D;<>6$;8?84$9;A>?9855@?888:979%<8;9<;C:;8A8=DE;A8>:16$95%$23A834@#254%2!$!!"!"56%##3;8<3>B86:8;>>?A<%49@:%<5=63858=23166;%8>%A69DA?76;:9<989@>;<=4?958%58<$=73;?:@9??84443<;897@:=;8@;@B;57?:>=7?8:63=$<<9=D>7;:7@671%3:@A?:%$8<6#4@:?B:::<7<@8>7=#?8;8087%%%4%=5?6=46;9895=$56@4;99?D"6B46@>;A:?4606$58%?=<:A"6%8=29#438989:;;9:A4239=;<9A?999BA:=86$?$95$:98;A=%8;4@8>95AA4;B>A$23=??%7;?887;?;=9?A6@4;787@?:=85=@?D8?@;788?666#13@5;9;?7D47885549:$;87@9A9@<@:647@89@39%8:><=>:7@6>8<81;?36$53@5$<8"8"?54:>?<8888;A<9=8#4>>6295==;D89;7;8945"=5D7697;=9$549?9@:2<%<=67<532#4>773@648B9D;7$8="?973"669675;;6">74>8;%899A:89=<4<>:<<=@D96@89:A=::95?=8>8$#2;@59:6;@A2@#298C<8:B8986=#4%#7$%;@>:5=66;8:6B5%B8?9846974568:8767::8D:?$7@34%8$<>8=73#>>4=9<58;#;33%%$"##$:<4>;;89589$;5<$?=<09$#%$$599:%<94=8;8:>>;8>8C8873A86=99$;9@%7;241@98838;8$%@<$6898>665@>?97::9$##%=:7##!96#7<<58%898867@9784@9?:?=58D84;270%"3464?4:96%@888D3>:7:@899A?5$3;9%33<788>54;9AB5438<@B98=689;?A?98A58#87>:85=D>%88CA9#76:$8%3@>87756?;?>6544;=8:7=3;>968?8#=$;<=?A?83%7::$9623;7;$5%;6868887;?<"345;8<9#46$7%6:%%.4<67"$569799$: +:?:=56<:687%#49;:$499986A5=B8=:5<:;:65;9:?::=898#:9<9;<8;5978<:;98%;89<97#6=:;<9<978$567;8%9;899:997:9:986:9=89;9=998?8=:8:?:::96<:;3:#<9;"897:%9=85#%<897:93<9<99587;7<=9;9749#5#%6:=955#7:6$9$%%4:86:;?:=#:9:?@?@:9##9@:97:>::;66144<$7"34A6767995#7%%"4>677%5A66334A68145>582443>614>#7$4A66$44>5#%7#33>4658%44:%%$5"?76#4%%34=45134A5%5$4%8378$44=$6"$3;563434>%662#4>66$44A67"35A67%3A58$34>68244A66#4!%#85!$3:6%34A:334A:88$35A487"2A682A6$"44A67$4>B%%49?%5#"5A?8547AB:344AB8$.58%$%4>#"824A6#344A82$$24A56:86A6664A578%2<#7244;96#56$24=6"4#5>9244A55$4<4#65:6%%7%4#4=#7657#4;#724866#43>4$5714>6#8%"4;86632A67245>:65343A673!64A67"34<563%4A57$4 +S:8445967%$4;77%4>66#744>7%9/%4A59454>87347A#9245A6;@57%666344A79544A97787%>A6864$#>567#.$5>7974A4#$#4!=766#%7A76%40:>671%4A6713:56##$=56%#4<6612A67244A%62$4>%561#4A9$32>#66%34;676%64%676!"!4>76244A673"4A78345A67%3A567$44A46344A58% +#66=8$7%5<#74=244A<355<99545;"77244>%65?"445;79645;791$%;982%5=469344>9$3487644#=69245.<997$A66#$4<%62!#4>9$<%344;"<8#6#4>5674334<66#34>7954#>9$7"".<667345=89774791!4=69%344>$89434>69234;%7%34=$7!%<57%34;"98##4;7$"34A69%44;>77324;%7%24=9$34959%5=%973;2877#44%797%$%48=77%4=;77244>!7924%65:67145A799#%$344>779@445>56%$%<$45>8$?:@544>688"$#869244:67%34=$%6%%4<%6%3=572=571$4<662$4>59934>66344;59245;49244>88344<79345>67#%2%<66%79<879344;867244<6$724>57##4;69244:673%4<57%#4;67%24=69234>69244<661%34>67234<46345<89645;79344<66244>69$#44>67544<<7%4499?!#%%4>:9424><8243<9754$>797444<57;#34<<%5"3>69324A6$9344A:=%$%4<$4;6#6%4<#4"2#2 +S9:4$5?7#%4A89545=484"#:786344>%74444A<=445;<@47445AB>@557>B=%%?8$$#4$5AB?247A77#343A7754":59555A99%644=<"4$;67"44=67#9545:%669545A87645A4%!#4A563"35/?87244>46344<97$34A8%#4A69247A7975256344A875$4A<54%"#""!##!!%"!8!!%4;6796"68A666$4A79"45A=@?%35A6797#345A89647A8974%>A7::57$5=9""788$6$69A89647>49544>796#:A966224<%77%34"?6752A#8$#4>%7144A567245<793444;692$4>57"#44278#486345>"35866"3A67124A69%43$7#44A77644$58%:@87927A89647><79544>8661%4<66245< +$6=88$45<%72"54=957145>9%6344>558#44<7654!:=77445;9687>78$3#44>66244>#7##4=66$3%;7854%78544=8765>46#45>69647<77$4"$4=67$4!4144<77647>976444#=8"<%5;$6883$#$754":=77%#75965345%757667>77:#75>76$44":77644>76%#4=6813%$4$576%4;"!457$34=672458978344>6681%3487344;8"434$:97:.:":;76535"7%67637>6;8778>"%66347;986!#!663%496612<:%:245A77545<7"344=57534=956234;56244=67$4;91=66547=566244;76%4#6%6!74;57334;4245=77547>48547>776"93<56764:673"566%45:67145A76782;66$49578$4;$8"#4766#34<46#%4!6663%$4=68244=557!;58%44;6!7144955833;56144<$%6245;77544;:67345:66244;$7135$7%65:9;6!8#"4%56445#876645:76245!<56%$%49$6$34%6633;461$7!662#$34:56#!#49%6%5459$87544!766144:5!8##456#24;75"576344:6%344;8#9225;67"2$552%4;6##3"48%34;56734"<%67$$44;76145<%764$79%632;663247"8#76%44;67334<562"4:4$#%4;47%#$!58134;582$4="#7"%4$4$14;56#4<57234;66244;47$4;7!8%3;##6#2;56$%4867$33<46$%$!#6%$$;463$4;56%34$47#3:56$$4<6633:7677<56444<56244<7":44%<#:#8445<%96%4486713!<56224>6633<%57%9%%#4475%814476645<77542<66445;8$6%%4"57$4496"24;55134:93%5<671%4<56$34;%7#4:%7124<46%4;!6$$4<56647<7%6#34;56145;$6244;67435776435;65$34":5734:"7675;67324;"66%$4;5%34;565$7472"24$71"4"8%45651$4466$#4!$##35%32"461%"39%423%6%%!#6#"4457#34%7%%4%7%!466$$2457134662#"456%2%36%%4%62%4;9"22:7"6145-<55224;$5744;56145>68335<57344;561$4;57"24>67345;767$%%:$%644;!$67445:756$634:77645;:435;76547;76245;67%%946$%4>56134:57%349573"4;46$4;47234;5#3%:6"97!9<%6961!%56664<76444:6723<56344;#5765<5$56344<57%$4;46%$3$633#583%42:66435;"86145;67#4;"855$!45;47$44<5734;56243#"75572$44<"5764#$;4344:8:9"$495543;663!5<661%4<8%725$558%<;6$344:57"%$47555456124;56#%3:47#%;575;57663;57$24;563"4;4%7##4;47#4:$%7%34<56$34847143;5734;5"#%4;8#$235:87$%44;77855444;4"514%A4%23;563$4%!96"224<7#"$":66%44:56%44<6714<9$2%4<%$$4$7"#2;4745<76335:947324765%;56648"676!34<%6#34;57134;57%3#%!765%35966324#6#224;7$7%34486665;77444<565$34;561$562#4;4$614>%66343;663$4<67344>6"7%45>67245976759:76545<665"4:6:345;99#%3:47:$74941486734$;%64447985544976$5777!545-48444666333377643"%13/76235267644;6754579434;5643;67125;56!$%756435:664"82476#534;66#.24476442;565%$;46$%4:47$$3;45$4256$247471$357$34;56224:3#6#4:45$#$4;81"#4;45$%4%8$33;57$84"$3#6#$%;%7%"4956%:7!9%#3"9452$$4#$7%4:46234:65224;55235<76545>:74%:<863;<;$33"6<:244;:"%$%24;#6$45<57%!$6144";456334;75#3;565"%4<66244<66%44$3:55%3$4;4##%4;5%7#$:9$5%34;457356%3!23:"$4:3#!7$$23%$%3:461%34>6644;5675799%24"4"$$4;465%3;66043:4644<58237461$33>66243;57124":85565<;9"54#!$24$"53!1$44#$49%3243:8$4!!!4%62%4557$33:$%5$#5!461$$:737124:55224;47#29%7"$:572$3;472448562449363;47%99365$9914#6"55%2446"237%34/$%4"";;;>68%4$447;6@77:;99<:;!85564;9:!9;%?;9:$:77;776:=:A8#7:=;=:6$7$749#"#;744244;7$85$98;57875:::;:A7$A="5465%785:662;44566889:948=6:9@89<8685=985%8"73$56=:?7=/794="5;5!4%65#4#94%42481953$65<6;8;$;7:58673381%4%6669=$:6889574572<5;754/%82866%%3276"745363$45:774.83%8#7<5=857=6;%82<764/962274;568947;5=:87=7:5854566567":0<4=$8$8"699=7#46757589%466<5=7$>675;7?7997$5:A8:$78=7;3687:8835=::"#>=99A79:4764=6?9;9$25;2;:><897=384447;;768$6?"7==9:A6:9:;?#6<2<9@9?7;899;%2729<=:A6455=8?8<0;76%#$5"898>!!9;:553%77=7778:575%<9"8:6<9#8=58:=:@:$6899687#98772/46989$!""%!46:98=8:?::/998;:66$65<$38?:86":5795$6:6279987:#"""5:9:4334"79A>98<=7?74972!65898;=9712%%!8;6>5;98"$"<:;?A27:3:6>553379A;9$<$A:667<:;:6;8#868:A=;=59":9:9>8A=:8:?8;79#>85347?=94%344<9=#9A8=9576778%;;:;"498#::#9><:8"68:9:4::;93:":8#$!<:9:7::9#849!7;B=;A8<5;=:#<#<978#=667B859;874;<<9A>;=56?7=#7?:A857?=7:2!2%$.73;"86;B480%9:<9$<;$<:;7;A87%67$259<:9A;>=8689??#8;9%%5>=9;:;9<9:6:967!$;43379;8==9;98:;9999A::;;98":76#6@:5967$6$857:3;>!776745898A98669#7D<786%47:=:?:6%99>;58$A96979:49%347;77=?=769$96%#<=;66:%8748A:"#96;84:8;7799%:%78%##7:9469897B24"6@9:886::?#99A7#"$37:<9%;9<;95#99?#A978#9:8%8<:8#33%>88!=48#4:#9<75988888975;7#37::><%978%%;!A79A8$54<%"3-4:99%=75:6944:8>;2%9669899679:8"8%87878>5:>9944>=88A:9;969966:96?68"9:67$889943"398$78368834:579$;9788<9886749:8$65;8364568!;6"5:7"9<899A963;<:8;;99=<88;7:7:7999659979#:;:!87!"$%%224=?56#78797>867:%957;15-6$!#!";$$78=:5977#%97;888:!5!856>6;>:>;98;9=89$<:A=65$59A""87:A9:9884;68958:A=9A7798;"6"=6=:%=669$A9"">;$:9:A:8?989;:9987>B98;9?77;77238;A87:888;84:<98:7$$37:$:A;7967;9<;93:A8234"<89;9><9"7";:9<76$8;46%:235$$726=8=;:7688875$%7859;9%666?79:68>99;?="6:=867:%87$89=::9":74995#!$>:#9?;9"79$";8A8>;:=:>9;55:"=>68<>7A8:#;968>644#85!$3;:66;84#<:1;;9=<;65477:74$4%3%=9D976#5?8:48@<>8899:<94777==?73=$#99:9?=888>4":A87=::=9$::<;#$?:87=6%=853;;=6%<98$4$"8?75047==9<;84%!<24;:";;<99498"!<<>8959=76:96<33"87<:9#9$;5<47%9#=89;?<:%#:54867=5988;9<$?8;668;8%55?:987$:A76=%68<;:<6857-:%=6$99%$8:#:5A9A8>;#3#334:96@%37;?69$57:>#5!$%334779;;A9?5>;8A!7>83$;?8=;A77"::677;;88>9::76;3437;B7867>=;?:=;9>756:66?<@:7:::85?9$9!;A;448<=97;62437;:797=:99:59:85497;:9%886;A99:879<88?%?#2%8987:A63%7"9887;2344<7;8E76<879!7@A;78A8986"838#:4656753>83<9?9::A=#8636;<877<<8=9;4@89#98763398895575?;888:67=;==86699:;:A7:;88=:88$43;$:=57:4332=?";<"?<=87:?7877;89353776?975?%4<;;A:<9"%8577;>;>7!"7B856;89>99!679";9:4;9:?8:97763<<:!:>$%4:434;B!8969867;888#=737;9$>8=:>99?76978"<79477;@;55976447$3698>=;<84;:%9977;:47@A#666626:93673:9<#:8333868886?82;:93;9<=858687786%69;%59<#A@7==%$7%?8>=$5:74:A6866;?9A:7:4=547859;#34><642:>9$9;:A;%8968432:><49=;A8;$333:;3;A<:9;:=94?;977:88A8%9<:A637:<97@9A=%>:<9A8:96?:7$:3:74;A44:9887478#6;5"=6=64<779#=86533689?4089::77;==:8::278988998A733:?:%86=73;::6?9$867;;:977:>:9748=95433$36:>=!674;9:A=:#460#!>98"$4:887;97A87:A=8%<99::8#564:/;83968%;=9":97329>;:9;<=998467$3;<24;79959;9A45537;?364:?%;9>;7"#332789:98:8$8>837=84$379?899:6?8=8989?<=737397$;96=:#7658.7;667%34986?!765;:;9:7#33=B<<378$8A6?:85:996368?3?:?9:57:<<:9:;36<87:8=:857;886;;86;%=85A876/;=:8#3:?9!:7:=98#96%773;;:>769:=8=674678#<%54;>%34:"6:8;>566?;8:?8:8#8:"=;8:34:<=;A=;:::A724;698%9#8556744!#3:7:7$6!985=$:>637>>$99#8"988:1##33:9!6;5:=96$888787%894269:"8834;87382!3;A8:954%47:8A858%39"29033:8:;76;?949=455:54==9"437;A7;73<#6767922789:=66:8$%4:8>:6:8834:68:;#337:==:66966485?7"%9<;8$;<5333757$%48:$689>;9>"8456%6676:8$;?3999<8<8;$6778:87899!?:<:23!67.:85:A8=8#53%269::A76:::8:7386:635856<8:6#6@8?67;=781<323485=6:194;88=;:97;>;7$;?$99;:598"4:$98#4#788865$8=#%699:;A9788"8:7A;<5768"8;=7499;96:6>9<78:;=89%79:<>966:76$8";868;:8"6555":8;?8<4=8"9??8=#89;9977=#336"%%%#3:<%99:4;6:94473#%3;?=#9<:A6<852:<:45:27:==:%;8=<46=9867<9;>;899736675@=:>;667439;933#3:337765876?:!4$5?5#%4:A6;=:6=;8>:%3:>8#;A9899%8>@8=8;7?=86:66#3775:9:;<88":;:955/6%84$#;9A%;%9>96<32#33!449:98"779896=:7;895498:76;;:<7799:6875:89:A>899<6=-5=:?:;6:76548;;A9;9A4=;:>:98879:8;966534;>98963334=?53$368;;?9#%#7?:#6<;#7<:>37$66<7<6;8;895!4;78:=;:9>=564"38877657249:57398;9999;#5?:<97:6=7998<;8867":7:7>59#6$34$"79@9%2$4:12$376>A699;:4344:?433:8496?8=;7<8>996334=4:86!:;:<9>8#6":;;?:9?795338C454465;926187;978B99$4994<;811">968;9:9683:83:42856577574<;67:7749=8887<979;9A;4398"99$9>985"8::36795%5"9>5$47:?:=<<99>6?959!9:8<57:<88;97=;97!/978::56"548:"!!79598%55:7#898:=766677988288%634=879:9!8679;9>="6<5435<76!:887;8;7587:82:?:=8;:A=397=:8"6=:?7;%=:8=:988"747867<:7627!68:963<7"8$;8:94468!6::=%=9977;7A<:993927<<#36<98A63:8B:168#8976>25;"7=285?75$7"98773:A7;:6:6"A9?;:<98#9?=889?849>;57"><:789;67;85"4348884;?<45?58398;:5469#5%66896$84=:>#@%664:9":;:9"7=6%34:=95<;6886:?;%789<<443:;#67795<<9<897768?"9@8=$5;9$;:47678<9868:866>;85"46#>::6;;<953;;97"858699<8>8$423:"87:5456:<987;12%388<6>%=:A98:7?99!#7788@6497:>63$238837988=8=8;8<%8:%=:9:=6=:?7;8776!?%76:5348;7#6475:1473:9:"7436;;889?7:3240958!35:62677865:8447475<4;76=:8<8765>7=9%A%94757=8?74<:88986<26:75<7A:":84765<7$#68A2:;969?;68?989149"%22:776#968?;?6468=8<:9$76:=8327:8#;875997:8>9>8%%3#48524:>9#9:9:@8%9:;A=7565<%:>928;<8896896598<898<6#888?876=6659#;=85;A9556#587!<96==/;9::4997379887:922"97:9"$7;#<97:;:89658$7%8?$$59$8!;036:?6:45:7%3:897789<7=38:523:8:A%6=7<7?9A6795:7%7?7?;:::98:7889%6553!7897887787%>76:95?889860565;:569@6%578C6659<68=94"687A9>$6$7;;C883<4:97699555$3889?85"6113:57859:89?:;4243#4;%99A:8#997:$:;"658:9975<9A7<8$79;>:;;"867:68$$6;99#8>9@$8%8898:887;6#23:?7533#<9=%3=<873@:7798@4<9:76$966$531!80589874<:8:96<87!6;8@45;@:84=1:856599578966:A442$46$24%99818=6::@9:3!8;87%"99887=78#8675!166?:7;986=65>9749997:579985"%7<49956967;#52346?:8$3:@7<7899;:9##6<67""7"8;::A7743;;586A7878?8=$;9464:9::;75@$"7#7::8773%6:=<:5785=5333659%5584#9#<:;;><66?466<<952388<@<$66#%$33663;46:8<=9%35$3236%$23;53=@>::946%$<:@:7989:;56$6A>:6799A;6"8%66655<:9=#4469;><86#<:974=;<<99:8;>%$23369:976<::%:;888:;8@976::$7<:63;>73>;88888#$:=;4=6386;88982888$:9$998?4=8!46";9=7:798:-727563:@;884368$9=8648A868386<:8728:<544989::458<=88A<3!9A::7899$369>768336864309;49%"34.6951!23=<2:42:=822;5:9;;:;968;A;95;8$E9:$62;:77##23":7:67822385#4234:477:737573854!82%2%6:<2329;::7489;88;::79;<69:568@79A44?6:6#?;96>;799@:46E$><:=784:$:==9;7866%73234$67#4<<97<:=6399;8943<6"$6556435;!8%89"89;:978$487975:966?9%2:8523233"23#$%23%6<$333::862;@9;7:7B:?979=875:;93:9<:8";9;3:993#@>;@53:757=77>;=%667#76<7635:?9<9<"8638;<:"796;9=<765249<8"4"9872333939:54#512498A%>8855<6:;84;89<7:7$24<45766#<#=8:87%238!=38#369;588$5$:9A;#7=;8;8@:795:@:A:65#7%:7989;99<3>933;:78974<<96;66#77:@95B:98;9:75;8;87%4:@;8=79%423<<8:9=799;;92#7!9;<7#A7:?848955<8"1863387?27"?%::A8:78$5325%64=5:77<=3<5667469$:89#:A=98A58:!59?:97;8">96=99A:97=229;967":665;73<7799@;79:<=:%796<8?#%:97:%56>596<78=8:45>=:7:>74"%88856;@9<78:99;4%24?895433;9!8>938%875779;9:"757679;75#88<99;4197:97E$:=436<8A77;98;#<666"=18>853<<;5$37588676377:::9647599%69:;973@@5::724<79;?<;29;668@;:;698:5862:@23<9;396$:87675>:9889<96;;=<9213390%89$!$6?788676%74;>5#>986"6891<:569>:7<:@559@7768=8447;<88#385:44$99B745>99877;8:6!52988:775<=885%799897;7732<;426?=6663;599765:$:46:99E6"4:;=7=98955"886848;:85;9A<:A:3893469:#68659!!"$78;9"65<9;7752%<:=8$=3468":>66365!957;7<:?77?;!59:::;76::438:%<96>:68:7;878977::797<=496"6259?;88=%$49:G68"6498>96=>$86%98745334936369A9796938;89"!54569:44#:!743:;6=;937;9495659093>7763!%?=:A;<3?65<35>7=8>6803:7977=9$235849%655$7<;<992776#!79A6#3$9A61>5$;:@55$;%838:59:A7?=964727:<4%2399/2:63376<;8<;=::#8988$78%4$8<864:@24"777:#7295$39@7:@76:69866%33>@6@76:8=8=88<638@77;B<73%535;>:;;;<9676>:9=#96:$7A;>9A=$7B<56;:;:7#9466#8#5=99533<<699:737<@896=9#7188>%995333%<742294?748!8:8:6<9#66746;:9<9:5544458:=<8#7;987:6699A:7<987:#1,7!6;84<$470669<:799@<554>#75768:$=5:>:36@6?946=79915463539%%%133<@93333%33=?8<<8:$233;853<7385?68$24<84"6;742449$;98$7";;<::73<97:==98;=7;%3#;,:96:9;748%98775:#3#9455<;9978;>8<666<<9$=;:49855#49::"9:383=7%#69;78949>8:>8<8%775<6=#77774979%;85#:97969497886@8=:":85;"84;7%25998#338879;88?8::64>8:3748863777@8;7799<7$7878587?9AA;?48;;;77$:;:6979563;6>:58:;:8;;763"87;436"?8654<4368534568<23322565:8763%4#5$<8:6$563356!79757:75=675;76;9$13349>784@72557"87;8445!9634755:754388@6329956;498656:$:893294B99;9:2:=8669$796><8=7<9#4996><>554>8#5444?66>!19855:%9;659682;>8>46%;8562585:6<76<1425><86@68;668:79;879=9@76;45$683=$239$7!26:8::2#=85$56>8323776!9;;8:57===98:474:7B8"42<;?477466;86;<;%38:698$"7<;7:578<:75:840#;3478;2#:24:::<;39:58:<9#=89:.8=775736#;4486;93!"588:33488@9;645778:3%88894736<:?5997%59=98%5%47986@:4::576;29@88?876;64/8:8966544774>:9<9775562$13::::644$7855$:6<3"62:9<$#4488:9>7=74<@94:?%796%872645489%69:657::59426<:98#5%8<<58:@?9;476569?898<><"46789<6@97A8;5::>3<75;;894129?<:?9@435/$5466#549<758!788957<:749::;37?%:8><559;:669@8824@7789;368$656558:68768"687423!9<74><=$2;976#465=:"55:<85:457::9=:478579>A:8>964579@7415:8<;%1474%67946:98?8";75245655$:%=<9!78$%?72;8>8;99:699:%78:7%1:<=5985;8%32<<86==87;"4"6355:"7697869#964%;99#56<;<:9<:9<478$4567:655;879$77"?;:<63=$3657945"%7687<6965674569454:3;;=8$89:5;689?5574"6425687!99367878885A9545746>8@746<99684:98663<;895857574:#3:48;5985344"799<=65"8!8:6#6;974439#6A<9667;79698=9746>99A74!867>9148@6;549;9;87653::8!983<:56?8#$256:9:9542<$7B85647;3376896"54>5:96>9@;5!67>4738:78:;:!"!#%44#315<::7999246895748#;874<89:5$4><<<996"%"%"9?;767;8389@67694%8995;=9$575159?<594:4"556976:$@;1=88<5;6032864:A=9;9:>::946622035;613#9:88>?<9;32:A#<;=77!<<$?;559A56@6;83%B<7:9#98<1438:8@849899?839?;.8::07>72954?997<33>8?:?;6435?;86$11$43;9A:<977$%##%$36;49/43;8;?$:78988<>9:96:88526!7>354;@;:6!=5586:><8:=:6:45$9@9776"7475897$779=833<<6<58""178@6$315<2458"63=324:8<33#3375067668;53$;9?;$9:>6%2#<@<8>3"6"4@;881876<858=67<5>48972>8:;163:;7#$#%4,$$"!468"4=65;99B7!;2=89-7;;8:A9%979#089$1986<:%76<37:52217159969>686<;85719>;<9#:277663:$47837$7%78;-##23:<214:%%3;<296=:88>;7456:A;87A5621;5%12:?6;:8:3%7@89877#39$441=>:%6%6986557"629788567;6"%69%289:533:6488<:$33=79;@7=:#<7:69<1057;:9:5#;"59 +S98%44B77%$4A77:256444>775!85A7654!:A976245A9%6444<7972%4=572!%A47144A66#74A675##4=672%4>$6#45>66344A67$$%4>939%#4>77!8334A66#34>66334A::@%35A86%75A:#"%!#76#4""$$34!<$7!9!!$!4"$!#!6$:4#67%6"34!::%#:#4"#$%63"""8<5##3#$$!8%$7$!4;"%!#!$!!"#49245A79645A978!;/>A889067A:%7;14$>D977!75A58%5.!;$$76244A5"$4>65324A67#%>67245%66%24>67244>%6%44A7734868"55/:>6775A$723A69$45A67134A658#%44A9977A7724>7%62"5A6864A%6#456!""%!"673:8;%!!!"!4628:3%5A769$.77345D866!77A88;!%87C77920>$886547>6#7$5%7/?879%87D79645%66%4 +S98$53?6754;87254=89444>773%4>78244;67813A8974/:=8$3>571508:%6144>762%%86%$34;67244>6575A77244=#58%.64>"8664"$9>767507:77%3=5651$#>6764>56244><244>76#44>676234<66#:8#54>%7675<$67$7$>8734A8%244=57344>47%$866244>9##%4>77544<;877$4:9887$74=$5%44<:56#4%6#:67""%3=3;#%%34>76734::8;64>662$4>6%$4=$6"$;47%%;47244>67324>682$4;=%324>%%244>679#184>673$4>6"2>678%4$54A79544>77:18%A77544>77$339556753:>7954#=7##44<468247975#4%>69144;6655768#84=773$4A59750#:A55508="57084A67%770:A88544A57344>67%44#=66764A86544>8779/74>796784A6775A79"44>76344A67244= +S:865%?7$7$4%=#8##$4>87%44A896#4=89344A7"344<77444<471%4=67:$#4>77234>:"?444A89144>76$44>7734$4>8514>862344>#57$%$#$=654$:%8>6723%34>572#34$967$#4>667$%4=55$44>69144;67144:771#4=861$4=693$44A9914A89644>8744%<9"$#57344>$47344>7624>69$44>57244>662#;779$595$4>7773=87544>="%@:/84>795%4A77544A67:.85A99%34"4<67544A99644>9965A97734A692"$%>%979%$4>772%4>#86344=79344A<%:3444>66344<6612:67234<76344A69647A;:9224<759$84=79$44:765%4>#6342>67344<56814/ +S79$%4<%7$45?57%$5>56$44>$7%4>4623=#7234;5%#348$%4#$7$#346$#$%55$#%466#%4468$44$54582%$351962%406624$77$4"805$1334>59544D79334>5654$$7$%4=46134;%8##$3A69545>77548571$44;6823$34A777##$4>5%6344A4613487344=7765>671%%;48334;76%%4=67%43>46#4>65#$6%34=67234>571%=46124>67$5#4%56%34=#%6$34>67$3;%$#$34<77544>8=$57244>57144#4##%6%44A99<%45A:98$33>68#%4=47144=48$%4;471#%4;57234=67244<663$4<57234=59%34;48$34>#$6%34>47##%<%6#$4<6"3458%%348%$2561#%4%%663$4::77234<$$24<59%44=57%%:87773>9675"54>566#633<46244>57$#4 +S78$54<567454A5955975%4>%7344;967244>:1$4<6944>6774867445A79444A77234>7967D79444=462;66235A7964=%73444A677#6$4=%7245<99565$66767$6$4;59%$%4A47%$#4=773$4<57"4<571%4;7$=%6#34A77$34;67$4>69635>69%4>6814A572344:7#33=7"77244>69%4=67345A$8544A67544<7954;9234;47%24;678644>8=744>9=%4:9:#45=5:#$4;6=344;98647A8=64A<=85>:=644=78647A<8345A:=547A9>544>5##4;:=75:#89344;<8347A87%45A9662A69544859544A66644A::#34;9;6#5A9=634A89647D9;74D%#95%4>495663>67#4A69635A%6145=66%44:69345=;569D77545A76545:794345>6764489$34>672A6654>7"#%4=48#%24<59"44=67%34A57244=7934 +S8:354<79445A%74%4A#=34>895#4;6914>:"$4A79$$4<5$$$4>56%45<77%%%;47$44>66$9$56##$$%4#81<56#4=%6%34>67144=67%44>674#4<%614>57%"34<48#%4=%7"%3>:86"4>#$6$44=76#%4<5%14=673%;$55#%;4$#3$56$#4;59244>57234>66#4<673%4<$%;%8"%=$6#2<46$4=57%34<#%6$34;69%34<%6$%73%4>79234>67134=56%#4=57144>67%4;66224A46$4A79344>69544=69#44A667$448$6?7963=7673%4A69144>:##2<6724>66556><671%$4>47$2$67$$4<67134=8#2=57134>461:$6#4=47"4<57134>462#$771228%%714>59144>79244=675##4;59$4%>59%24=66324>473%4>693$4A69344A79144>56%2<%6#%4;47%%4:%7$#:461$4>59244> +S;8354B89555?5744%55>:76344=68144;:76444>79$44=97$3=564#64;77"4A<234>876$449:$99344>76##>77244A69%3$3<792434=69545A5924487#$$7$4<$#4=67$44=57$%44<665%34A;234<676#4>67%#34>%6%#4=56$$4>69234>$%6%34>46"%4<66%#$67%44>%#7#%<%$1#4=$566$4>56%4"$67##$38#7$2$$$2<%47#5##:#357#$3>$3=86#4%>9>54";6;#;$7$34>92<"#3A69344D%73@643A67659124#$5A776#4%559%44=556##$%%7#%445$6##$4587#$A57%%4>69132#%9;$44=65$8##;"358#"3;#;14#:$7#$<%7#$7$;76$%%92244>66%44>#:5#24>675$4>4763<8$56#$44A?335<7;:.2$4>56$7%$%4>6$67%4;79144> +S9::$88A67$$64>5743=5634A68?#$$4A86$444A786434A49:.74A%720A:9534D666$%4A#$#$%$966;787A58535A96544A665$4>67445A762A$8767344A675450668#67D5;/87>"6675<561$>>779347>;79644D685#64A68$$4A55344A67344>57$4A77144>66%44A67134>4$134A78334>677#$34A47234;67544=66334>8%714>469184A66:/#$4A777245D6824#A97664A47544D99;464A8778/>A677$D9984A?9"44:#<79547A$49144$=%7345>7664A$56345>67784A67345D56547>66;0>A866424867244D77644>%822A5:7$44A673334A6655;46244A562A5<59144>572>45624A66"74A55244=5664;66:05$97724#4=5634$%7335A79145<957%$4;#$8#494659344>%66$577$7%$#$4=6##$%4$91%#562$4=57#7$34>6$1%9%6$34<55%$%4$#$654>767$#4%46$4>67%5#$4;6#5"4>%7$4;6"6$95633$73%;#967$61$4$7%$%4=4$6"3476144<466#44<#%#5>#7$44<"#6#%=56$2$#6#34=6$34767134=5775#334>786#%4A69$24;:68$$4#%65#:%#654;69344=4674<56#7$4;#$65"#%4>975#2%4>6924A6933>68776#334A57144>6555A7928%5$6#%4A67%44; +S97555;%674458244$5%#5%%%68%4;57144:9573#44:876%44879556:$$%$%#4<57%4;57134;#"8#4%6$4967134;57%%$3;57$24;6754;$57$$4967%%4:58$#4<%72$4;46%$;$6"4;56%4=66334:661#34>69%34;57139$"4;66%44;6714<%#6##<7724$57$$3:"6%34=69244;66%%4:67234<47%4<46##<57#4%456#4"46%7##4=5#%;%7#$4%6"71;56$24;572%%<4$334<%6#34;6634/6581%4;57$34;;772%4;%7%=472468%44;$467%6%#3;571%4;%72"846#%%4=47#4;:1$%4;$7134;#88:#4$47$%957$44>66%;66144<67244=68134:4614<57%4;979$<%61$4<57244;56134;57124<57#2=$71458$#$395564>67144<598#54=671%4< +:$:69$:9:893<:<987;49<:955:89$"68<9$":8:%6964889$##$3<:<9:6=#8987689<995<8;98#?:=:97<9=<998:;496<9:6$%6;496:#8#%459:97%<8<:9699;9#489:9849:676<9:96#68:9$66:97589;7%58:%$#9:;966=8:95$4#;9%99:943$9:966$:;99668:"%%9:9858$:7456898:9689;:86:9974897$%9983::897699:94!9%846:8;9399<697"8948%9=97589<7755:;98$9:7<8858=<9"::796"!98699<:96:9<:%89;956589:78$6$:<:96::=998;9;7$=8<:%:9$9::34979%78::97:9=996=9<%904":9<955"89957:9:9#"98<997978"8$:9<976789;98799839%986#9=995<8:75""988#::%9966:<:96:9<:9679;:%%S +48<:9#79;8"$9$6$3689;9%%6:6#"49:578%:<98$8:"98"86%49990"7%9678%%9<85$7$49$4646588<985"9:55#59"963#8<498%8<96$<95827;5577:96%$#%48536::95%8<996%:%<9:56=:7578:8755%!88%/4$#8;8"#3#""%765#386#%%3%#:57/69:95!%/%$6$!!#$695$699591;88=#4#0:9;955"#:%%6968%.5:#"8%64:8#86#%9:86/:97874.6984$5";995$0;:7:#505:$99$#%49:65/69#%%/$9;96%59#97#5%7#%7%9:78299<98069<981<9<:9%6:767;75?7"8"69<:9869<7#8549:9856"785549:48##9<9863#:67%4%9;<2:;%9:=:966%"963#9:98%$89<:9:%9<:97#9<:83%9<8$6#9"5%%%7#"95665%%:<::85:9@6%7:<89<# +<::967:8:9%458;:95::=987?:=997?:798:6866:@:9368976#:697:96997?:=798<9<98779<:95$:788@7<=<9;?:=<9:<7=:8:<:=997<:<68589<9892:<:;97$599<9%$96:#37876.4698"::95?8<<#:78:?:9;<::?<;4=597<8":?:95<7<96#9:<<98;:6::?:=98#<:=99$:9;443:6:=:9"89$:<:6:845<8=<97?:<97#:9<:97#7:?<9;9:?:9%?:<89::A4$9:97::=<95697;<:8?9:;:6:#:;S +<8<:9:<:::6:::95%98;:96<8<986987%7<:<987:9<997779<:93<97%:6?8<:9#?::98?8=#9699;:88<:<<96=::796:8=99:<9<:78?#9<95754#=:9:?9=:95749<:94;?:=<898?:=<9:?%<7><9;9;8;=<96;8676#<8;76<9<:97=8:986:;:97<:;:;<99:9679<749:987<9<:97::;99:<:@<9:=9>="<<799$::7$S +:8:84$56;7879$6"#686$#7:9:691$<8:684%876554!##:777:9997"77677<99:83<7;7#598$66#6"986;$845#;87488;8665:9755$97:8$9:57497:7$699985<8:5%"%:94#597#465$7#%$:9:9%$$$$###99:7%5%665$5975%$98$985648:90%36#:4$%#8:74"6964$396$986<8:974:%95%68$7:<#99#%%85$98$9%5:9:468;74$$4%$6%597976<7=9654;=875;;:848689#89%7:8$%%3:947$57799$4#%;=;98%26%=<48%;;=97#68::7%#;:=957<<=99%<=?:9:#<<:9478;;559885;6974:8;$636%797678::557589947::8%:9:557##$9976676:8:97#7;9:89;:$7569:9388;974:9:5%$:9:9$7#967%:8:986<9:983:88=994<=:889%9:798:.##$64?:97<#=4":54#4$68043#$%5$244$;=;974=7;>45>:9;@=52=8=?:78="=3=?:=69$<9:;=@78$=<=>8$A48??8=9>?A<;;:7<99795<451@8=64;6#:554$<@=::<=9<>478<::94754$1>?A487>=>;#9>4%7>7>97>#74885585<57>4#3$<;=3$A9:4%7<489480<60585%$9$85>?A4$>9;487897<97<5<5@$7759#9>444:$;4#9#9>485<7>975$9/>97589>?A769>?A5:873%<854:/9:>4$5<;:9>8>465:4<75<2##4575<=A>59=%<>8#>44#7#65:87:5>974<5>?>4$7>%8A;A:?$85<;759<7985<$;$787>?=789>?>9A4#9844#47/:87>7>47797>9A387:=>4#>4;9;7?5<;;=:7<5489:944$>46;4558:5393=9921<9;$74/<8:872;87:518:;:91<87/981;8;%=#94<9%7#92;8864.;7<:944#9<<95<9<"6:8$87511:$#77:98:95#67995$598:93%9896$9::966:9:9757####74!#%#"6$#$$"%%7$6799$##$###68"79:97$99:$398:986<8;:99#<9;:9%?895=968:=9%9<#695789876:9%##4697%#%8137:=#8<:93#8<9.$777?9=989@>%9=<9:@$:=<9:?:=<9/$#9<:98:::95<9;:5#9%;<796":9.%99:8%746459:457;#9;797<8<:987:9;0%%77":<:9558#4%%.4<:<::1:<79::53#489>;;S +87:77%88:675:8;998<:#953%87:4%#$79%98$;8$7#898799$94$78%95:8:$=993=%:;9556:8;#8585::99766=9645479:6$#;6986:<8:63649<87:$#"<9;;"9%<898%77#68:::8#<9;9767:#4:=%9::98=9%9$6489957$96#65<:8%%99:776:997:%8<8:963;"$9443:9<9%7#7:6$66#9;995?8;5$:=7:<97<7:7:4%365:8:787:98:7$5787%6:9;5$75:8%$%=:9::687=:>7=S +9:=<93<9:847:769=<:?<98?:;67#3?:<78:<88#:86<:=<98#9:7:987:=:91##<:<:9;=:=:70%?:$=:9:999#:86<9<:75:98;:9767:<:9%:::983?:<:954=9%99:67<8;:8#<8<9987$9<5488:8:9496;:6:85686"79#9%<8;9956:<:96;9=966#99?993:>9=69$?9=@"98987=6"%<:<:6;:8=96%8:8=<9:?9@<9;::=:73=9<97788=99%%6%#=:94?:@::<8:>:;;S +9:;84%66:99%99<:97??<87<8<98.$<<=:9:?:;<9/4#5=;=:953?$=:8/$04%<::<79::<69":9=:93:8969;6%:<:93=?9=983:7;%98?:<7534:9764#<9<95$<%;:8669;5%8=585#:794459:<:93?:<<77<:=97%7989:88;9<6:698<:9$?:=:888$<6#%4#:99997:::9#789<9$6995#89#"#49:586::<966<9<:87897797<:69434>79344>8$67.34=7#"4>9%77#$4>767"34>76#44$6%779074>669244>79245>%9%134<79244=7!;#44>79344>7767%<5=6<34!9>76723%4>67$44!7%44=!79%5%4>67144=!%68%%#3;%5##$4=667!<8>77245;88!891$4=67144967144>7965.65>?245#=777%9%9>699%9.9>67#%44=66234><#34<69244=5$7%44$:8895344=:89:175:999344>79%24>692#"4>69$%4=46#234<47$%4<67$"#%<##6%24=82=$34>67144$8>6713$=56124>79344=792""4<68$45<4:7344.9;%9$67%24=6713440<89:.74<56765%:>66954>57%#%4;876234<46$%%$4=67244>67244>";77245>446%87144"=9#8##2%4<67$35A7954>66343>66144;876#34>67345>777/74>#:!%44>93;1$4<57%44;876143<766%3$34>7$67%34<66244>56%67234<562$34;# +S9455?67244#4##%7%54A99;%45A;99$44>68#%4=47144=48$%4;471#%4;57244=67244<663$4<57244=59%34;48$34>#$6%44>47##%<%6#$4<6"3458%%348%$3561#%4%%664$4::77234=$$34=59%44=67%%:97773>9775"64>566#633<46244>57$#44A77347=8<144>67#$3#$4>86%614A%8%3"4>66$327%7$4A676:6723%4=673344#>7765>963%43A49545=69%44>5%7244$9#%4>6664>79;834A66145<99$7<83798%7<8672334=7676#$3:46%#%#4%7134#;6$#4;5724>#81%4=58%$##2"#71<46%2=57%4#58##4>563#4=49%34$5654<47243=%6#$4>665$%4A57144;66334<46#44<:59444;<5732499%50<589=:$$;8865527%453$$42$5#79%?:D??27>??39?@:7;8B?<6895555577857;7@?<9%76429<87?%4"B??=85;<98<;=694B@8<$45###:32879??9A6:73059;:.<;>;?65;=:997%7:98%?7.%507<3@=@6<5>@@#;=:;8<7<5845517557:2:5885<==;=<;65.85755<7$44$8187%$474<755;678582244:3<78%58==8:38@=7;<"44:/545:55415087#;6782==9$7723<7833280648<08>9985$$=98;;685:484376%456818;#4%7-88967$897:583541:7#43482<@=7<2$463;;<;%87<.$43<78885540$4:8465<4;;<968285855838796<7>:;580>=;<;7;5:#775855<38167<@=484#>=7?=#;=8$42:48752487588%>@@8<9@;@?=::@:=AB789?#7;7779957@:<@7>7?48<@;@@7>5:9;@99?:@?E=<=8:.846=98<@;8<#8>5?<7853>=95:#97:=;96<$>55%9BB@@9>7<@=;@<6:=>?8D?<>>9>7A77@88699BB?@6649=?8>5>=@8547>=@;96:1<=;985D?8577>25#%6964646#%%6=<;9>=8<#:;69?=>:6"$@;:;72@249987784"54%;#$$45#4-#####$#"$%%53877;<=56"?<3>;7769A#7A99?<;:26:9947>99<5<:=?=6959:;:<:;9$><:@?769:#9;569987/69;<<"B<7@@54<8<%9$=@=735:5$4@9D:9B@:@144#8:8?;:5D=:B855956#9$872#45<>@D?9>9?3A<87774:9;AB=@;98:>;A:>9>@;@=>: +9<:%5:8:95#$"78::97"::<8#788::5#:>;97:98699<48%5687<6965899$9":97%#"=98689:#94$$#"$458"854:9;878775!9%$#7"::995"98#95$#!6!%985$9<975<%$9<:76:8%964:=98587669959=757579::@=:9695998;9856#9696::<97679::96<9897658995$%87%5#::98679;9$9%#3%:$9:6588%$:9<:96=9=:47:8::9:688::73#::<:978:<%77$:<:55=997<;<9$=;=9758#9587%B;#939969777:<47"#=877=??97;7$9=:9768<:96:$#"7669<8779;4$879"::8;:98#:<97<98"9#:::87$99:96989:57<9<:96:99::96:88:966:8997":9:#4649:9%:669"974%::9$779:74:9;975";9<::%S +8=97368::9597<98%;;%:955<9;$7#"4"69;83:;:945;?:99:??977.!%:62%7::992$<9::6@#;9:986!96#"#$=:6:8%9579965#$%99;9798:7##867$$::94"7!84469<98769;4""5%;9%"!8::97%:88#5657:98379:996:8:9398998$;::757!7;944=9<:75=@;97::<:;S +$:84358:9399;9878:7%%7:95699684#:9;96%99:976<%;796:6997<:<:9698%;:976:<:79:;987:9;96::;:39987$#:9;:88:9:?:9769:9%79<:965:=:96:8=:97?9;:75:9;984:9;985<9<:9$98::8$8:998<9<:88<7<9588<5%:658<:<99!78645579876$:!8<977798$%9998$%8:9:76=S +9;946%9<<:79=<6:8:=:799:=9#8?:9:78;:66;9<98$668<:9:8=98%<9=:96?8<999=#9:997?8<93<:<256$<":;94"::::957#99436::99499$53897978<:7998=#8<:97?:=<47<:=:9799<666%989$=:8<98#68:975:=997:9;:96=9:99698;897658<::=8<7%6=:948;997?#9=:=89=9;<:=<4%?8=992$:9=<9:$?#=:;":8<:9:?:<<=68=6887=:9"<:<%69:2#7:789;59898::7:"?:9=:==:=<97%#8<9699:997?9;:669;::$75?:;86%8=:97#:$9975#769#9=:96:9#679A446989%764:;99:=A:69#:4:<<98;<99959997#9"7?9988<9=9:"7$44$4;479595::=99;99<99#9:=:99:<87?464<7767965:5:>79;8;S +9:985<:;:97<=8:98376"95439##74#;::976$9:9539%6975=9:667#698560#9896699%:759964$3:9$965:979%398"#78%94#:::8$3887#69:6$#%5%#:99966:9550986677543::897%<9:5%$$#44578:94:$:94$:$6864;$96$8:;%% +S#$5<7$5?67244=46$5A;444A77%%<564A79245A%7%"4A574>%96$4"9%<<#$4=4665>5%244>57134<8#9$44">772444=77$4=6344>7$344=9#34=:#65>57%45A763%75>9%82%5=67234A73%4A672%4=66244>%7$34A6775>67234>672$4<5%%4=698=5752A57#44>9=9249<344A56144A47645D77347AB67644A95$44>776%=%6#:69:/7%A772;%$396614>8#344<7734>6$%44A4%%:$441;9244A56144A6344A66345=95$28%7#%;#"6%=524<67$4>661%4>8"$4>%%7144>5%30<<%61$=46#4<6834A5654;:779%"A661>6 +#%6>%"55A8774A564#5=$5354A471"%;57%4<%44#8A557#4;57%44=$$57>57%34A68635>475%34A47!%4A6633A66":$7$"$47%%45367A67%34A47#%=7724866#44>781%4>67#44:55:2344A4#1>#7%##974%4;91$4%%7%!%%$%$55#4;$6#4%"5#%4=75"$4>5744;7144>56$44;573$4>6""#%8$52#$#!3>%822=6$43>6#44A58#33>57#3:$8"3;98"34>57%4>5814>56$4>%7$4%<556#39#14>6724A!62%>472A66%34586%44$6"3;%66$4$:4#%4%724>55144#=#$$%=$7557133;847"4A66%%748%3;%6##4#<68144:68%4=7$73#4="8#"4;6%5514#A66544>$8#4;$144>6#>%6$#;$5$%46%34A562$4>$7$$%=5%%>46244956#4A7344A85#2$>48#34> +###>%8:3$=67$439%77##395%#"45#94$$"%;%7##:4$7""9#62%$"$#8%#$:4714=57%43;485$:%7#367%613;#%71%$4"86"$3<$52#43;857#34%%%%6388:5%44#5#%44#:56$%$86$:$#3:87"#3<7%%%"#%$$%%4;%618#7%##;#5##%8$7$#%48%343:##"$"612;556%1$;#6##34;65%$#287:444="66#34>43%%:%#$3#;7 +#$6<57735A667$44A7954A77344<77%44A479145D77:135>#978644A799%34A67$#%$$"#%##$"4>763$$4>47744>6A9#444A95$4=%#6$6144>99/3%$34$@69647A79245A?1444A%75#$4>56$3A79:96##%4>66$44:67344<79557A89334A66%4#8#77856$4=56$44>66%#4=58135A77##%23;6%7##$;47$4A67144A%#%5:054>657"45A465%34A7#860%4>$771$4A57344>56 +$66B897"$4=69458D87645D798.64A7"72%5>=77143<$##74#%4A#3==?245:591455:6%$4>7#7#6$$%5##3<661$%%:966#$4=65761$4#.$%65#4;4%#"#;47%$34>66###4>4%62$%4=56#%85$7$34>47723%6$#%%<5#"4=57%3>66134<58#%;%#%4>$66#847##4=$6$#4;6724497%44>69324A56$4/8A:996714;756$34;97#24=47144A776%3A966144A656#$4A672$%4<6##$8"$;7$6$#$4967134A66%2%4A;792%4<671487785;3A671%4A696$74A77%434A#76244>6712%<67$4<:9#%<593734>%83343A69$45878%6$7944#:;=5;$4>7"<3%4>67#$3>$<#=978673234>%9$324>696#4;693#3A563324>691"#89$661%>46#$4;%5A6924>591$4>47#3%56$:565#%< +473<8<46%#:9<975<:<995%99986:8<996;9;64#69;997::<"98%5B=:98:9=9999>$794$=?994"79=:95%<9==:9:;=?:96;?B9#::99;:99#==@97:7%$8943:>"=:95%?999?9=<6%4#?:%=%;9<<;$>5:8<<96?9=<95?9::536%9:9"25="=:9;679:;78:64:<998""88<$$8;"6:=?:87#9>4$89:<8=%9%:"78;:98??:<"983%%;:8679<%:899%$$4:%%%$<9<#7#4:57$88:6#79;98:?:>;98" +9%6:=:%?#9;;8<:9569:<55:0$<9:2$8<8;:8$#69;:;?68>"7@6%#8=99:74:<;<@:9$=9844;?9=997?9:883<=?999;:=#:97<>"@797<:$:87?8;:857<>6<99866:=%96:;99$#79:95$497:6#668666%%446#7$%6$%667$$99;48"36??5:57#99<:966:965=9<@?9::;99%789=9;:96$6"8:9;975"99<96=969=?:=<9#:=%5477%5%8<97::$74=97#<9:95%9:966=9#:989987%4<$#:;;;$ +$%<#8$"5?56#5?66$44;8!8#34#887#44>6624>77145=:66147>:86:615=977##3$;57144A6664>7$14>%7$44A77%%4;6%4>76144;68%>581%;%"!"681$4>$57"%=9#3=46"$"<57$$4577:5844"844A:96344>774%4>6$%;67$4$57145A66"4>67544A6614288#%;56%48968$45=:47$44;762$4<573;67%34>474#4=66$33;7%4>5713>"75#$#%7#!3=45$"%;##81!9%%":58%%4<%%$:47%$%=47"3<6635<61%4A66%34;$6$%34A9%7%44=9"24=672>%813>56#4;%7#24>665=7"%:$8%4<56646;57$$4>57$$4;57%3<46# +$7B9%;%54A89:.64A977144>8%7$%4A66$344A69444A778$64A7%724!8A77344A672%4#6$234<8%8%34A;972$4A673%4A:$;$44A6"7%44A4#624%3A$8134A69244A67144;57234A662%5>673.%$>46#$$;57$!3<57%3A67%43>67244A67"4A67%44>$7$$4A67%44>9#"614>69%4#4A79:.!64A6555/:A67%!6%$"5734.$9A557$$44A#561#$4>6$!6$3%4>%#6244A46144=$765/876714!5>9$6344A<$%;##%5A6:9245A99545A89144A9"<65%%"3A678.55A772487A6754A671$4A66124>4$8$4=8$;#%4>"7$$49#8$#4<$#<46#$4=%$!4<5634A562$4>56%4>461$4>58144A57#$4A661%48661!549661#42?"73%4A5634$:>67#%$9A667 +S9D:;;298>:@7;3"49795%4A79159669245>99447A7983A::@#50775175A9734"A89623A6925A:%72#85A97754"%4>;56:%85A#79%45A99547A4<$@5<387A79:0$769345A4934%4A7954$A796%47A89647A99:$#9#:A<$?2435>$99;44634A?544>9#?79244/9$=#=$4"#4;7#75.!!8>"56%44<7964448669%%35A796%A667547A77244867$%#%5>#;58"%7>""4569447A796871>A4;96#.<=86534A69%72#A9%6$"33#=:893%"<%"?;0>2$>A7915135A79245A7%613%24;695#5A89:$#8-77<6<87%!4>657!$44<49345A9$68/#45A8987 +9:67$#54436#8%$6%89869869$69#898668%96#99;996999:7649%;:8679%996;766%7%95%89::8659<96566;99799;9688%8799<995;::9%$9;:5469%64:%$:99%:9<:9:;:<:941?:<<5:74:=:9$59<:989:=:98::::3;9$973$8;99568<:9%=9:96#:9;:66<:<:95;8<:76:9<986:7#996799937::977:9;:7%<9:97#4#:996:"9996=:;996:8;:9589<:9:;:=<9;779<:97:8:97$36$:96#;%<::5?:%<:8?:><9:%><# +S$77%45=883164A67344A67434%87$$%4=78$$035:7971A%7144=65#.:A7774;9:%444>8!9"#4>566#$44A966134A:%76$%8A$#2A9877414:A67248$7344>85"$4A5#4!5!8064A56234>56554=6%5%44>8$#/988:3967$$%4A58$44:#54A662$4>#8144A65%409A99$%.9A77%4$!%=45!%$56$44768A$7%75$4>77834;6366248#6"A6#%42<#65096=47#24=#5#"4$48":"5$"$6%3$5#46#$$4"6"$#4%"346723:$%36#4#956"%4>491$4<66$4>#83"5>49681%4>#66351462%4""7"#4=77:%44>57%865$%">672%3A662%<$62$44A489234>47";75244;%71%3>%71!4768$4 +%:879887%88%$#$9;97$46$9#6$9%96#5#6<986<6<98%=:96993#<68::%.1<:;#94$8#<7;997<875569:5=<%6;76567::5%47#98<:98<>%9957<8%966:86%$#9#954599::9%7=7$:8%<8;673<:;:%;::8<94:<8:997"9>79?6:965:98:498;#:<<7#6?:$<9:?9;9%$87;992"?9=7#978::<7#64$<9;:%96;589;@9::96<96?=#=<88<9;9#88$2849998#48:96659;:;?99:935%<49085$698:5:%5<9=7698<98697?8=<97:9<595:8;99#?9:9944?9<:997%9:977=9<:96;99:8%<$<:457#=#<957#8;:9$%9:%$#987<8;86$149:997<8;69."998%;998<$92388%8<<#<:9.$<=#;945<8;68$%$5>765445?;;%6444>79544>79444:97544#=775"4>56$34>66549$#54>45";57543A8674A:7444=565$4=;%%234>92449$75#34>?:#;444<%?77#44>676$4>78%:565#4=%#%>79544"=867665A79$44A87544A8954487773>77344;%554";6763=66644>6"$%6"3A:#75A87:5%4>47$:613$4=65$5"6=676"4>77%44>6792=4$34>6757 +S955?87$#4;77"7#%5>=8%7#4A89%45>99554;879444>79444A"$6623;8964>89154A99634>782%4=46244>48774<:<7%7344<<<:%4479==9%#31$7665/6>778:%4;;169:?:;:<344;77$4%?8$6$44<7999:A<>?=5=:977=$?4$44977#3#9$$4;778787;69734$58A$7=#343>576544%<77:=779<:66=<74<97$:73638383728683#8:5$7$:97234=%%1441:5%#%4%57$3##"3%56$34#:24#<33>695$4>76:.34;765357867$$5A<6;544A79544=67#3=$7%6734%4>67%$4>$674A7964%69344=%7%4%4:963%4>46544A87%44>673%4;66834>67%44%?77?625%#?598$4A76345=<:789643>97#%2 +$66B77#3%68$%5A775#5=59%44>56%#4>671%5=6%%515724A573$%5D77:377>8;547/=;99545A78=144A86$4A59334<6%>78$##$=49144<56#$:24=4#61%3A79%45A69%44A77545A793434;=76#$#%>786;8348$559D77245A793%4967$0445A77144>7#%#$4=5"%$4A$6244=67$4:47%3>668#7#4A79445A61$748%$;#4$<3%3;166144>67244;65$3<47%34A67344>77%7$34A56$344%49234A996344><14;%%4=4;6679%:A79344;96124A67%34>47$4<47$34<5622=5515$=58$%4>57%#%<6$57%$4>#6##34=46$##4=776$3=56%$4>66%#"94714=56#$;46%$;"6%4>673#4=48%34>67$34<30 +$7<#7#55?%#%83#5>##8$45A89$.9#9;?67$#$4$949$;444A6773#5$$4>79544>5244A%75##4<77$4334=67/9%=577%#44A79840<:6"1"$4>%7"8%%6#$$4>%76406D873:."##>:%?2335<67%4A7%59.:$8>4758/6A467#34A5667##%$4A67%%18>671"$4=6%88%7#%4><%=$4#%#4A76##;$#56##34A559:.44A46713$A6#6244=#%###;57144;5%7%$#4;46$%$:A4;6687669$#44=#%6234>767%%#4>672####$#$:662$34>5%%"4A;89234>671$44><6;540%4A56$$4A69%449A66$4<%%##"56$4#%7$%476%4.6<4#%#4>8#@24#471440?%7$$4;897%34A8 +358;9736$"68$#5$7:97488$##897%:8%6;55$88$$98$986588%9:95%59%<99678$8:5#:8889578%9767658:93<8;:599%9::94%###95%9$98%:88;766$#879::6%568992#5:6;9%:78#%$6#%;97379%%%:99#99$:87864$#99;9577%$#6:8"76<7<:8:979;967%98<9871#55877775#4%8"$6:4#%#7#%#938978;98$7#67;#97<79<9577"$$8:965%%88678368:$369;#847%#995<87%8=9749$;7657$%:9%<:;9979:;98#4;$76%;$9$#;86::789/#<$%#$898#%6%#:96697:994979#$$7:98$7678=:9#74:#<996<8;75$8$774678;:57%:9<:8%:9%$6#$%#:8#6##856":779:8#6"797#<9$%598;973=:9%8=68<97;9$378#7;955$#7$<9::87$$%:659978$#7#$:75#%987697;9<#98698#%7:7994$58#$##4#%#8%88$""7%7;:0#;%9#965:$7$879699%?89?99:669:967$6659#448"$$936/$53;:9799=95$<9997%9$96669#96569%;#98$%88#767=48#$#76%7$$:$9%4759$;97669<#$ +%?695352;";58%35>67345=66$4>67344A6644A7653>$6=%44A4775%:9872495$1A$7244A76244;57#34A79545A776%$3=6145#<14>6614<7#54$?934796244;%7%44;4734;466344>671$4A%%1#4;6655A563%4A69544A6%144A8634#A67544A67#34;575$4:%35=563#44>66%44A77%#$#=%61<48%$4=56$%4>47$2>%6244<562$4<75#$4>58%3;47244A%66144=$6144A9344A586%>47344A69545A637$6#4>563476224A57144A657%5>58#8%$::54>75##%>6615A:59;65A9677?65##24A78545>8:?244>66%4%66335A66544=67$4<#6776?97334=67$4A56$44> +6<:9;8:?994:9:<66#79;:#9=<::76369999768<97779=:76659:95<8<898?8;:%89$%##48:<:93<8:75%588<998?:=983"<9<:68$##%9<99"#5#9<:9:<:=;697#<96%58<:98#:;4##89;956:9%6%$49;4975=<8<78558<987697=797<:77::9=1#%6899<98$4:<$9869:988#:%97:;76:<:5:8:97::9673:89?:942<9=7=7:7$:<:64?76?@%%66%55$##9#9856%$98677$9=:9679<998:7#9;97368;993%#59<:87"$##=$:7%"$99675$8;9%:8:7757$$:;89:74:<97%$"%:<9957%8=99::9<0%3%##9;7985$:<997#%9;:5465#99873%%9$:7757"9;455$#8<:5$"<99248#:%:74%:;7$" +SC98555A6755588344A715#:A$644"6>6144;976#4$74>7674A:8643?772%3>7#:#55>7924=4$%#3=#7134>4634>%7562%4>6664#8>6%#3"#34>7664>45"$"4:75523>#75%1#4A57$#33>87345>77334>67$3866444A8764<6714.%$<6%%34>48#4%>57$34>6774$?675445=:%45;=954>662%3<47144>:64;4#%14>767#:#:57#44A<;7344 +SC87554?8%:052A%75A875>47%%#;8"$"4A7%74#>662;4%34>#6344"8=766"34>"$73>7"331:/=7774=568#3"=#$$34<7#1$>#%67#4A8754A9%74#:#3=#6#9%4>65$4>#$%"#65"=$##4>476@9A;56$/<%?7%4%;$6344>57$485624#=88674;9%8085A6835A6%:.3570<624=%444A6617$6A8774A95:0%74A9%9A7777/79345D76244$<4%9344A779/:088473343A79547D669:/64>#:9#%4%<672%4A5613A79:175A57%44>466234>47:087A7;;177A69#%434>67%$2:?97544>8985/:>#75389A7:645A89544=$7#4=69544>663%##9>763<334>6$%8144>9%?6647A79649:877545A691034:8:5%8#4364>67637>86:/<;4::6435A96;28>5795479795:2..>D:;854%=895%4=47"9$%50>D7:913$"3$57124;#6"3;5714<%#7$24>6724A669174>4734%7A679054A?8065=663%5:677#:467#44A79444D79344=:57;585A66%144>6%9:%84>9;374>D<65673$47D66694"%4>69%47D:%=55$:#?87445=95@341:A76245<%67224A79770>A7934$475$44A696 +9;:9$9:95%39"9877;98:3:9<97$879:":69;:856<:76?7=<>:;:8$46:$:956<9<75;89":6<:=<96<9=<89:?98!9#?9;<48;9<99:=?:%;7:7;%9=995=69::93<9"674##"489:?<:7<:<:95:::<:9859;9$9969679979:<99=<::<98;@;:99$:9:9."=79::%<897:%;7"99%14:8!$$%8:995:6;=!#99:984:=#;:98;#%848;%:4%#:87799<767# +#79$#%%496##7:;94#"";;:$993$9857368::8%$99<:;7$8$#%%889$9579::6#7$#99=96%4999$5::6"#9:95$9#647%:;95%#$977:8$556:9%95":8676$:75%%#698:498$96$6=$%=:9$%8$:#378<96%:97:8##$##%995##5:7<:0$4:<:76:#=#8=967699#95$7<4"#<7%#=97559$:55####;:87788#%9<9855:9$9#9%86#%#$##%;:86%9::%69;9767:S +S6=9948D9;85$%A7944%4=:5%%5$%58"$4;4644>67"4=47#%4<%8%4>668%$4>661%4A66547A7965%884934>76144A461$4;$742=5723857%4>##7138567$3$%>59244>898#<;969545A69544A96345A895345>96:%7$A67344A:6679#34A6734#A77545<66;/%4#?78$%4A;:72424#?68144A6#<244A469245A59#44A79344A9957$9A77145A:3;54/8A699.%44%7@144;89934486933>56%8542A66145D77344A6$725/66245;499/%4A79345A69%4499$824A668%44$?67345D67144A%9334A8:087A$9#5#%#4A669234>9:75#5A77245D:8267D78623=757$44A%40<%44A<6 +S%:5545A9;54#A;693%4A;89544A47%44A791334>79444A%71%4>:7@544A67534>79%4#:A7665%4=9%43>79144A;145D79545>779$64>6:445>7;8%4>778$44A887%44>$$$9$%;46$9344$$%$<$%6$$#48""5%$7<%65$$%84""#%2#:$#$58$#"3:59$35A#=:9445A76:534:468#$4>$%962449656%#$44=%66"4#42>472324A77%;%7445A577#42<#7391:696$45=6"975$54>6#"4:8:;587>667$4457=56:35077%"977247>4869#7397%<"<69809:465%44A69134>47344=%732#>A:;;%4A799/585D559144;49224<671;%$24>67%44=:87244>#%81$4A69%44A6724$4A791#4>48##%A795344:99$47A=:77$##%4>:#%43%9?87$<47A7;644D8:3#5A97$$%4#$;6;545<79334>89%44A4;245;77#8$5$?666#5>598724#7=:4:345<66652495693$%7A899%44#95#6#414>562-%$A66$#3>?90?8$474>9#$;:234>75144996$$24>$561$%4<=#$792444#3$#6;%533%$456%5%#445%61$%%"# +"%:4"#$8;86%8"<85398:958:8;95$#686766$9<:87558:657%595399$54$8#::$8%%6997869:76$9$69$04678%#$9$#7%6#9$4%496%6:$95%4%78:7%##$8$7%;896%9:9764:977%9857%$:;9948<99789;7%$758;:8574:#%4%569$#459::95678996%6:7#%7::99$59964":89;9#68";9:8567#<97;96$$9:9546#97;9765%7"7$74%#4%$5755%89#54$"$$"98;6799;4967968:95$#999:766%9:895:9;986997#%4##":8;:868::97599%7::69;8:98#7<:;96$#89:966799;977<9<97:$:899"88:86%9996$%89$$656:978"874:::976#:99#$ +9994?:=:97<9#::9969:9:89/$<9=:422?9=5#?99:?:$$<95;>6:9869979##496<96%;8=56475;8$%$;4<4:472@$9%>5A9666<:<%9#1;?:<<9;749<::$#9%63:9;:87=97865:$<6%5798=996<9:7$76%9":6437#6998<99?<75%:?:?<48?::9678%<5856::84$#7;$7#843:9#983:9:76$#649<95688;:;<98:985;:#;964$9<;97$:<;S +S:8454E67455?6755A47835A87$45A672$4A4#4=#7#:#44>%65;47%44>$7%4>5%%<%##4>671%4>$7#3>"75$4=561445D8674D667544A682>%8544;67245A66544A6564>%5$4A%%144;57145A573334;565:46#%44>6665A772>5%%44>6%%4A6657A77345A5714$7"#34>$634A$5234%##6%$4=471%4;56#%3<57%<%%14>66144A57334>%68$44>67344>$1>48%$4>42%A86";48144D87225;876224A6864A6#51>56%45>6653;%8%44>661:56144>46#24A66224A5533A454#4A5%7%$4A4624>$6144>67245>5$144848244A561%#4>46%#4;$6#%4;7244=56%4 +97573%$:84$$%5$7#%86555;9:;"6$:=92"8:<7:9;7:;99$#%9=:96<8;68<8;6#"$;9<687%<::%$$<<#=:9%4$$:87$<98=:911:$:=545:8<:=:76<:9?:989:=596:9=:73<:8489@?:=79:?:=998?:=:539:;:9$<::5#7=:8=996::;9%3?9<9969:8568=988"::=998<:=88%1::<:83<9<9947<:=?#9398;03597<:=:9%<9<9586;7=8;95?9<46<7%<6#7:%9855%9723$<:=998?:=59997978597::<:8%;9:7755:;7768<99:?=%<9456$:<79%69986.%;9<:97?8;846<9;796@#9;7453%::<9:78:99#6"9::$/#9:<:;!5$;%#66%"1:8;9579"96#%:8787$%9=99:8:67#759##%36<::<:775?9?369;:7578897397:566:=:9:5::<989:::67!:996$<9667!994366"=97%7$98%9::6:%9<:736:;8#86578"69::96:97:99:75<:988::<;:68$$5:9998796:9"679:96#559:$:9:79::68:;<6%9;;::S +S>%$$45:%7245$=46#$385$3:48144>77458:678#49:56244;57#%9##14>996244=562"#4$%61%%<%57%5%8#$4:451#488#6#$3#9%%$4<%6$4=%7#349$#3#%$=66%%%%7#$3:5534$:$"$2$#%4;476$64=46$%846# +S7>67%%5#=77###57144<:%744#4:57#4#>7%1%%#7%44>6980%64>66$45$#4;#8%"34=566634<764$4=56#44=67#39%7144>$79.%4>67$44><998%44>5662448:5734%47%44=46234>773#74>6755.95?952%$4<46$44>6$61$4<7%144=661334=%6724708A8664%=661#43>65#$4>57%34.%814#8<846544A"962244<57%34;$$6#24>676#4$=667144>#764#74:5$724#5#814;6954A76353>67$#34/$>#77543>581%34$6$%$6#$3457%%%#7824%=####5$34=6%44<47"24$;#66144>67::=?764%5<7954/8>6689%7%3<55%$23:767707%94#7#%3=%66$#44<848%34<9#97653758%4#8A67244>76#$3<$%7$3%=$$ +S9%55;776$5946":#34;8:=148:66%4;67435>77%44>9#4;#7244%9$56#4>67434<4633<$56548775:;864<875714;89@444A878144$$8774%4A:=65>46$134;47$34;57$2;47$%4:6575=%6#8%7#4<:$3465=1$34<#%7%764;%4;4#665%34=47:;$;%%4=B;#:6$:8946144;57#4%:#"7#44:572$44;58%4#47234;#7%34489;934:$673<:55%44>:6<245A75=344<9=7;237A::7;>;785<5617<699%0%67%45>%835"767444<56 +S<;/4#9A87445A5755588244A715#:A$644"6>6144;976#4$74>7674A:9642?772%4>7#:#55>7924=4$%#3=#7143>4634>%7562%4>6664#8>6%#4"#34>8664>45"$"4:74523>#75%1#4A57$#33>77344>77434>67$3766344A7764<6714.%$=6%%44>48#4%>57$34>6774$?775445=:%45:=954>662%3<48144>:64;4#%14>767#:#:57#44A=;74443A4879$:><375%2%$=%8"867%#%4886#614>66$%<=:$?6673>$85$4%7;6=$6#55#%$5%/%8$7#.;%#"6##.#".#%$$"$#7#4"7768A6$3$4>57##4A9;%44>$<%3%3>%8;4$#4<6"$4;995783>#614>$$56123#<:%44;$4>412%:6654>$534>4722$;55234%554>66#"4>#78$?58$4>;;"6#2$;556#>56344>56$%>6534">662#=572334>5%:.%5A7654A77:/56$;6$44#8A77234A5674A7$344>663%4A66$54A6654#:63>4773"84A8734A868216>8";"756:#:>7"$""6A7674/6668#>:$93%4A8%4A867:A869@#4>5%6#6344$876634%4$>82%756<7765"4>66344>6744>6764>:1$6%#>#7:/#4A56%8#>37633A<24>56#3#8;4;56:$52;>"7$24>46#4A%8$4>67%44#=73A67$44<$$$%A#771324;57#8#6<65;$76242>"9$83:%78#34>5$42:977:#9A66234:942:8787#:77"5A6$244<#6"$:5%:#3>65243#?#77#50::8#88755#8>5$"34>$78A4424>%64%8$5<44$#449#$;956"4$;47"35A#662">6764A55424A566345%?769%4>9#2224>77%;244A$7344>67579$54>669$%#>8774>5754<58"%9:"24>9244A6664440?6#=35%:A69444>$69=7>#77144>638124;%814"<337349$$6344>576:2$=8A9$44>:9-8>5524<56244>67%44>7344=<46144;57#$%>4#13;548$75#24>6"#4$76$2#54;9659/8A666244;$653>5"6#A56$0?6$9343A$355:%856%4#>5564>#5:64@<46$3957>#78;6"$9757%:#8<46%6775244;575#"2>$6$%4>$/:466$%2A775745?77#4;666%4A788;::86344>45654>7654<763%4>56"3394%"#%%3%$;5564>6:-6A6674A4634>5##133>"75$33.%%<#6#34>5714>5$4>$6144>5%144;55$3%<"%2;464$4>:6184>6"4"#<4757%:#6%5%4#2$%;#$"$3%;##71%2>455#34>$65%232<$$555#>5%%4"%$6#"#%6<$9""7A"#%4<51"14<##5%3564;5563%%4888614:8#9575654A%173$2>56#4A"%344A%65$53A$66#.:A57%2A56#8#4>41$"4"?46#:244>6%3$4;55#654>8%5$#.4965%##8>#2#32<%#%9<%#$5#4:4;%#6234>47$7%8>%%%44$9;513%46344>88913775744;$568/44<46564>4$44>%6$984%6%3A6673A5664<856%3>46134>6654>46232>67#9447461$2/:>6$"4@%5$4>$%$4>456#24%68#8>434;4$7:#?7;8"$32>552@:>56%;$4A57"44>4$#44%;<47#86#$3>462%#8A56234>4%12>$7%4>5%34>$$##";%66>4%#34<5$749<7$7424765714"::"$#4>76#20%;56%#8A87:#74:@664>4$%$4>$$9=>876%:A4443A8677/?:"$6"%34<68%33<6#32>6924<8$34>6"$4>6%%243?%5554A7744<562"4>:8$8;9"1>4$933#47:%%8@65342>7%:/41A%64A774>46%%#;7"$"4@7%64#>562:4%24>#6344"7=656"34>"$63>6"320:/=7674<557#2"=#$$23;6#1$>#%66#4A7643A8%64#:#2<#5#9%4>54$4>#$%"#54"<$##4>366?9A;56$.<%>6%4%:$6333>57$375624#<87674:8%8/84A5734@6%:-2560;524=%444A6617$5@8663A85:/%64@8%9@6767.:0/564@9$3%<9#="8#8848A5#4%#>7$#3#24<676344>5$;$4389#44#:>6$:%=.:A56643>464;7>$#543#98@65@477:663/=4""14@46624>8%9143>7754%<2?6661-74@365#4>5%#671#44>5%#8$54>4%%34>#7767-46%#8@55123>5$07#2<56$%4>#5644>4733>%>568.96>5754>:8734A481.44@66:95"9>36"134$546%>5$4#3;9$4$6;64$%33<#7"#:%44A7744>563124>47:#6"76#54;8#%34<4#34>4548$9;974>5%4$755%4854634469$564<:474$"8>5%5#2>67#$4"=46"%3>$7#238665%>84#23>%61%>7$3$>52"@%6#42;<52:95557%53>654>563>37""6<4%7:1%3>$%33>76#234>5664>6234>8"#34@7644@76223><5$0>$#13>5643=>45#4;"?#47$4>6#44#;65#;7669673339734<46$44>$4$:1<7>86534@87853A768;5$#234>91#$;>#%$66##4>#5$740=8763>6663;<46514"7>$62%=9%3394%3<;65#23#>465244%5#<76%34<%622:93=4>"777#.2<%67>%$$7"%=#7":38>6#6%=97?8:773>5"3#664A7653@#65@8641$9@98#73@67444@<434>571$3-9>7"44>5561$4>3953>8$<5$@$8444A85333>643%?<7563@$1%%;2$A3$3#5$;>3863>66247%768$%;>91-84;3<$78#6;7633%>5754.%3=57$34>:5@7664:65:$%#54@36335>9:833:7755-?7658->=9>76%444>66#32:%7"$0?<9"8#%;37033:424;6%%1#$8>651379"$$$5::%<=97644"<63:44/;6";5;76234:#4988#4#$%54<$6%%3<$4"134>3659"?43@75$64>943%3<465"%>55#"32?<9$<:469%4@66#;$34>5#80#@661%3;"06652>462%4:85634>5#34;6623>84-62>%7###$3>54033>6$22%%<656239<$6785660#>46$$3<##657$239746$23$#7$$%3:3613;:643:4>46449746:"#7"5465"09>8=33=55243@#8=9?64<"57$$4;85333@9@39%#9@;$;54>63"<#">8;4#533@995#4$=:=577>;75433@;#74.;>#886"6>8%66$#=2@:#7"32>4;8"54>8"#84;54>"%743<564/>44>9613#=988462$33>:=74"?97"86=0>350#"#$##%"$8;461339<$5864@%23>%7334>8:42=46%33>#8$3#588"=>777$8#8$38"5459<$"5563"@;6243>7813743;364>9$%$6##3>88$243A597854#>:534346986134$#5%"5$5#$#$"#7%6%%747$=$$5:$$3744<97$43"2395"%%76#;#99####$$$5#%$%9"8$%8#:""93##547$2;5$$"$$/5<#4:5"4%7#4"%#75"#%#""575/$6"7:$3;3%#:#4###$#4$###$93"4997"65:562%-;1-$####7$$4$"#23/$$#8#6645$##.%5%"7##484%%%6#7$#%4#$%3#5#%56"$6646$%94#8"%%$6$45484$8#%%$7666#:"%"$%%#$45%;%%561356%%#"9%#"#5"#44#54%%$##$#5%#$#"$6$"$9#4"66##%#4#84$%#%93#%552%3##47%#7$4%"69###9"56"#%654#$544$6#6#5#":#:%#"#"#$#47#7"%5#"$$$:#$25$%28"#$66#3%.#%;8"$5##65%%57% +%$799"#<99755::773278<:98:8<973:$%;673<9;986999##6%599943%97995<9;9%#9<9949=:286789=:68<7#%<9<999><:6=:75599656%;9977<6%59<796?:=:9$787945:<:9:<8<7$5#:;986?9=:964%$$99655$9#<6."8:61#$:9:5%:8;7#$56"%<94#04%7:%9%6==7$7:64$36::5575:;896:9=99798:767:956<$:$5::69986%:$;7699<94#:98:98#%965#8%$9=##5:##5%;:>;:8=S +SA77%5B667"5?56"4#47$4!74"6"4A65%4A4%244A%6#34>4$$624>4714A75534A66"3=%%5%8<47%34A"772%3A46344A$524="5144>5$4>%7##>582%4A56%A6%54A566#%"%134=%754%67235A66$43>6#334A667A5%$4%56#33>66$4>5674A6654A67544A561%A46%%4A5654A46134A7543A757534A676"4=%73"4$461%4A#76!4A5654A$$3%>6$$$=4##44<66344A475#4=6734#9A6244A#7233A523:46%$5>66%%4A47$>5$"3%$A5664A581$4>5$14>4623A571%4A6774A653%4A$6!4A56%44A68244>47"34A55244A +::;94#:96:8899:958:99887%9<94$4$99$$98;99%786#449:7$!59%997#9!8"8%9995$%9995%6999567997%7:79%897:96699;96678"956:9#9777:99598%;939<96"$6#6%6::#%%899%898#$%776#"898%!$ +S7@=#54=%6744A675#5A8738.%9>674488A57434=662%4A46134>55144=67$34;68##4;6$$<481$4=46%4>57334>8%"9A6$74A77444<8:55A67224>6%$#4=%614>67%4>$724A45;%34<68244=8244<$2>47334>%6#4;6714>471$4>66#2=47$#%>6734A67244>671%3%$6144;$724;66#4>6$44A57$44=66%9344A7%345D779=#""7$"%"34A77%#4A56344;4%#4>69:.:%0@67244A6833=665A79%4A67224A66245A69144<66244>583#4>47234=67%#4>67%24>48$24;471%<5$%4;48#%#6%%4A69435A66145;67%44>673%4< +S>676%55A79245>47444=%743#>A:;;%4A899/585D569144;49234<771<%$34>67%44=:87244>#%81$4A79%44A7724$4A691#4>48##%A795444;99$47A=:77$##%4>;#%43%9?97$=57A8;654C8;3#5A:6$$%4#$<6;545<79334>99%44A4;245;77#8$6$@666#5>699724#7=95:345<76652496693$%7A899%44#95#6#414>572.%$A76$ +%%#6%$6%"8#699675:%6#5$98:8##$:9:5544:%8:76675738:0877$::<:97:$:#4#8649;9%#2$#%;:87"#$"#":7;9.#7#648;$68$$"8:%76#;9$5%%57%64%$%8:5$:<9<:%4#5:7996:65##%7"%#76#;8#699#<9=<7$:<8$4$67%975:86"%"$%:#4$";9:8#6$68755$##7:9;4##;9:49548#6466;8878%379::=#97<9;95#":969679:5#"89:"553:9795$799"5$/"77#$98:#:0"57:$98##89"#$<0%"9=;8<<94:9?:<:%8:<7<7$88#=#>$=#4566;#%96$#;%$$457<$<9#8<7:983;876$%#7%"6=1$:6565<9:496"6:$<9/"4=8;9136"995$#79:/49;<8:%9648=:946<7#67999%7#;9;8% +8;998<8=:87=8"78965<986<:84:8:983999:95<99#;956779984$765877980#<7;55668:6983;9:898<9;:755<8;:58:99:7%=$9889#997<8$:9/26%:9259:=<76?=69;:9/1:99%;:95?9:9=<9:=:=<9:?9%8999=:98?:7<576%<9:7845?:<:$67#9$7;778;79<9$8<9%:96%679%;:97<:%9<:98?8<:98<78#9;:8$77%8#8997:<799977<9;##98%=>38::8#9889698456<8;65497:9865:#877979566<99:<9:?9<9937$7%97::9899:799983<9:99547<59;<:=<7>;9:8%7:<:9:999898<:9:76;999:$78699:98%"?9<:98=%77;:98>;# +S5#5?<87%5877354>5734485654$76344$77244>56445A8714867$44<%7 +567<8<969?9;:686:9#75#9<=<9=?9:9=?8;<9:798=9:9@:<%9899;872#9>"99>$=="<1267>"<#4%:9;.654$96"99:95$#:9"4###83#:99#63<969553:9;4688<9:996<9767#;999%8#6496=<1"<8"%%8$46%568%8;49:;#:8<:9:?9=<@@5%<7879";:9;?9=:9:?97<<9:?9<995%=:=<9:<9<:93=8<:9;9=97?9:<:93<86653:96983=8;454668:98749:99859<:96<9<99:?8<99:<9<:"77<99998=9;98869;#6=9<=S +7699:97588#9867:95579:6%$#68955$:::94374:97$45"8:6"78%$%986#$8"8:95$$%$""96988#:::7"84992:?$8;"596:58;68;47#""$64$.2<9957%#8$57$:8:977#9<995<87944#9;9>:"9<49479:;986:948%:#48859;:92#$9:973:8%97%687545#%;6546486975699%9565$9;598##9:6%87;996989%5#9<55%68:7%#7:6$657%8998699;:766:5""$7;"653867###499::6%#89#77768<992$<9<:832<9::97<9<:9:7#9;#9574<96%185:;:949:97778=?9::98:99#:6$79<;99><$ +$%$777##55<%#%5=?7#7144>89444=66$4/:67144>79444>577#%####7"4=%79244>99344;7714#%6144;96244:#66#%;6$#44>67245#7=9$4;675.<;<378$%%=?$$99244#8#676#33>97244=7761#8#$#724%=76614# +%9445?$7%45=78#44><=;6$74?79449%#$#3%7=6;5251=%714:9#65642=696#4A69135A77345A571$9#A?::8##75D89345A41=%46%79:4#1:::5%%4A7898;34D8:647>67 +%#5<9:9484::97:9677#7:99#<9#9666$:957"#89;7$":93<999%$"94$689;87#=9::46:;%9:<88;95%$<7:=$$$$79$%%%7"=29%.##99;967<866%68;#4%#:895;979459$:59#"=9$67S +#7B9955#:A8;7#4>6;6$5A975$45A97$%64A677 +""4>#<65$#<9:9#/#$:85668;$;$%$""$99:9806#":9;1"66:8<;"6%9"""87%#69984$8986#":99#"%%"6995$":9:97399<"8%55;:;977:589<495%:=:348?9;975#59#58%"3/676>=:<95<;"469488?=;=A<8:9<44588>$8=9764:886;455:59877=5:896$>4%4/:?#;9788589;1;9"777567=?;8869%868:=:9:=79=:97?9::97>%%#9;8<=?::>;68=;=S +S:::$75A88%%4A;>=654A9=B:64.9=;5967#644>66%$4A655A67$4A76"4%>46#43:%81%4977$44>= +S9555957235;58#89D:67458A56%344;67544<8867.=4786$3;$88"7A5815;4744=:76344;57544=696$9A77$44=$79;%85A57244;$5%344>6834A77%44>75%714>66#44>887"8>6634=#7234;623%<58$4>724>7%<%7#34$6%96724<46%#4=572;56344>67345=672$4::334A6914>76$44<8776$85A66324A6722%"6:4624>561#$344A8;73"7##4:$89A8664A9764$7=6#4>67647938824<79%44;69#4>57324> +S95#55?793.<2%4897844A87344>47"44>77$%;%737%4>567;644>:%9645:8?89:/44%4>89:$$74>;244>77:0<.<>7864;67%44A793<0$4A:#6%%%"4%67$417%47%$%>7;6344>697.35;%%7%%$4%6#2%4>4713%"34>%875>46$<.$64;9;$6144>$89%$<65344;67#$%4$<#=86676$4;%75544>69333;9$<2249494/>.:A9965A9;-<0$%4=81"34>69244#:690<.>A6;6244<79#34"96%7%44=69#3%4>77543;672$4:5$5%7=47531 +S76B9944335;;5.=$45>8$%#%4=46#4=47134;661%4;#7$#4;%6%34="7$%4>47"$;$#3=##7#%4=$8$%4>466$%4%8%:$<46#4>67%%;"$7#3%4%#8%34>79%44>771$4%#967#24;4$6$7% +%9%55A77345A=;9773%4>67%#9>6824#$4>67244>%76144A87:/$75A776:96#5A69444;6!.8$"A#73"$=67$$9$>6$8#4:;7?77%%<68:6>#4A68$44>85:%43!5;7$7%76%44>795675%44A57344A676A37;89?8:=49494:%?"8$44;76$44>98#;$44A:;:14$87767794$5$$55"44.6E77245A773%4<678""86"64"9866:$17%<##6547:1%764$"%6>6718%66:084A:#;1!$$%!:?79234A79;%64A67;;1%4%#6"$6##4<::%6=8>9=4#379$8:%8=9"#<;$554=65:$%#::5><6=9A#$!%4$9A63428<:89245A8#?876:8:8=67#2%A;48>3:$4>67$#34>67"4;#;#5;?4?$7%<9"%#5$$89671%3>8#"#3"#:!$"9"2%6$"=77#72%5<%<9;23A66<;54$4>"9<;<8;9565<76:<895%0%%7466=$:989#;5$;5#$244<67$75.#76245>4$=:/5<<:<6;%308"!%4>767909.76:7"9:586125#7?875%9%6<9$4#8:79"$#44=6!9<58#!"9#2<$7%$71!69;8=844"#$61418>:79:04%45"":461344A89567/%1#7895585?78:2=<:7?47244A857!33<#$144<;:6765<%"97"$7834!649;8656#$#3#97:636"%44A69648668>56#4:47452%4A96625%#%;"%683.<#46<<:$565674/877977?7A57344A:$857:8A464<5$35:97;3471####767344@97:085A<#45A8%83$$5>4A89:48:2<#9695!"%9#:<:82"=618<:9;777#<37>?:6%504%4A9#@345A76344A662#4:%6#3<%6227%=899565A%9%53%"%487344>66344A%$9-624>4;68#$%465/$$""47$2#$;"5"""!!!%$"478<%797"<##2"4:9%<#$;8;;;#$$"1;3;7$8474#8;844$5"8#!229"""#%%24:578:1>9?8#5;886562$":77"#"$24;8#76%5-?56:/#4<56%34%"9#68044>66%4#7<$"6$#4>57243#=81$$%95$4!:"4%89623#64"9!"7%!$;3;9#:8>877!6""!"!7!!"54"9#;88<%57""$"4#64::%8;%#"6""$$8<5;%86"# +S455=%7%<%7#4<67$$$#<67%=8334=7923487244>6%7%%4>5964#5>799>%79;:1:134#998>$62%5=:7=9%445>77%44>%8%44A#57734<86 +S8@98255?675#:;578$#3<98764A89647A996#4=67134<6"7144 +689<:97?:<88899<:966::9$98:998?:;686#$%$897:4#:94$$9<97$599;:65:8:5989=97664#::;997$:=996<:#:96<9:996:9<:76;9;77598;5#$55449<983<8;944$$88;:68>#9$658699:%86:98<9:<8<9467:#7%8%798B?94678=?969:?99$98B=966$;=9957%?:96%;=8$45<:<:9896$:=<9<84;=998:9%;<98<9%;99:?:969=?:=<939<=95?:::997??<9:<99?447?:=47@>:9?<9;879;98:@4::::<>@:<:9>B>:>8;S diff --git a/src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/r4x16/qvar.0 b/src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/r4x16/qvar.0 new file mode 100644 index 0000000000000000000000000000000000000000..483ba168e048d9c636d7933d4f7a38f5149af698 GIT binary patch literal 32987 zcmV(hK={7^gXaYyA_FWf7)Mh86;XjYfkJ^*0RadJ3NcfGB7-M_2ZCjSaDpR)41x)P z9)TNZZZsKN1_1{F0R;V@=K!Hw;7T&l005G}Qc@t-KR2~cec@P6&;@u_ywMFbzpf~< ze1!vnT=fm2g7cM>oXx}ray}V-sRd~xsTqp6DOHe3M1+P)C9@K+ddhi!WMC}MnX+iIZxFan5AcF z2ACFGAfy5_Acy(ythwcny9|Q*#pq2$4~NQE&flum>l8fMrvclqa6z`@P`3W-s2yXXR1IGjGgpHl-98b zsi9`mCyya1q%^=E9=EJb`jMBXf+Ffp+NRwy+kwO{qoTA(;?CCQGt+z_7G;qi5f~CW z0F(NYq#+tLz=g@CPmV@2JRPMLf|7HwkYFiLn-LII6K}l6(eHklZ4m2lPNPy$K(XU+ z)L^1eQxIKHU9eoRQqn7t3A3aVN{Ao#g%$o-wY!4=jpa}TQ{!_*B)R3G?k(#EKBwT5 zS4yB~5VJtcG-NqpLqS|(DR(2EvA=yNK>c+W;)l*(2_)U`0`{vEniiY?mEi|(5iPYn z7Kh^P0v^J>4=JYq!DH@o*qHY6<=`JwsqZAo>7UPNS`s_g;7@#LV;NS5+XDuNcsHHZ zu3#axTa?X1uYj^%>0(B#hMow+S^4a$WmzusJUJAt z(rz>_zf$kbTjD|14LSFAaGUI(Ksr}gpzbzN$WH7v=-f;YjtyQg$tYppDvq3>))iCm zUbbeOZkfExgL(1dG&FRT(jsFWmi%ddk=`mEbN*;$EN1vLz7L(D6_KB?(_vpGY9E2x ze~mD)$FDFte7O^x8sul)z;6Y=N@Z2|rzT=14_8?&*IXV2v&crp|v2|*DulE5EX~l8t?bNL#-4K_pgV~^1CstENoG4jIZlCtNmvR zKfk~yX^c@jTDY7Kgg;UGx8#CDq@VFJ<%UXjS5sHq<()DnPKB(x80^!_5a~~{kulw&MD#l-pII0=HU|DDxjeWDc$Io+r zO7QA&fmS?C8l4~ed#?}c1%I1Q!Edn}#AO)mxV$J@<9uxYvPWG?z~};ObS@ehDgVL! z?0tpoyw8n)iA#bH`v2$(GTDJ&zgqDYj7r=2&2^sYe*fL;X~V!6=^sRGQy-f>EG*q> zcaPf5EgxZm3xC+@3~D;(&`ENhuND;8kryhl~2sx_7D2+Vg0lr z5+?A$>cf7f(?mDY2v99IQql||=-p#%Me6tt>0frF*hQLNz78}u(LIxbFn5-R(Y+d+ zo1L+){!?)ViGk-VieJ(s-!136L41>$-c@K>S+5hll#xX)Hy+q}+=h~v_Hsr-x(A8n zhA(N4p9$UGLRi<0(A$RTR*M;o;-8pKR}WQch-p(&DAgdx~FA!kjhgq8gII5w1$d9bO>mTJ>bsVMNHI~z#w{bnx?>l*2Q5t+2 zi*5toWi-h;|;N2J`uIVV_s85NfSnU9V#;vZ!OqeknYEx|ZGoOW!52lY=rUTL#f2!(1MUP|j^6*-kK@8%nL~P#Lg#4;eiDy1q&) zNB^`Y639JX&xQHd??3ZzM$KH9Ixyz9jMij*JUIP%>+g!GD?YKFkI(p-hN-%Q82CtDst5>^h!x zM!wv~f4?B7-9MbsFcDZY$s%5E?(2oGv@0>sdyRo?5#LYwh6A#04=dDbTeLN+rJ1Kw zhQZB=82bxfdmPoFzQAQ-MHpKaaDuFPiLYT_Pk*M)Ha#H&cm+$p0DOC&QVIp%UPyj> zkklakCGpE*mM$3=u{DIafT z(7)Z)z1O+{Mupx`AS4t8oB^ph^MG$e-@ z?-C6fr{gB&>ZA-NK`K*Ftdwp>&BML#mdDe>&+`oBr2e%j4)TcJ&X_z@o`>isW zlkXXHtj|TI&Q_M%1(&oE#()*m9(RaFPC$~tELDd!d^ASx1+<&Ne&TndGLJ$O=c{V}hP+FHYsKu}X1;|6m{VwUX3nmh8DOx1O zF1A%RqexfN#2>@zQn_0?%VMF`-62a?Y$NPxHrQ5l1?}QqEtfdPle9Uw(AmVnlECIr zxk6&p-!RdktI+A+Q#aF62XE){`BSAlV_|pDRCNuL-4w+kC>2Pu|3c3YwI&}&GuK9^ zc~=LZ$VMWGcVl)Xm-<`=!ADLQZqeXS;fL!-q22`Ly1#4=xa0LmPt%D7_U>K%5U2KK zhE0*O>&2h7pX1v0dbD+LKa17xI0wT(n1uYAK%wOgXl=O1%$Gu`f`kf2am79#R=CL5 zxp*A}ep+DZ#erBSg9{Y9nI}7RidYbOH1#l{Sp-*o z)<{vwB#E{&cLw04_KN_ChzJwue%lH%y6qmhRzm`$)!czF?AUG=_7}9?4^1K zo{Ma%C79o&1osZ3PG%$f3spziX60QCHH>PLRaJdHAJ@iL&ztx)k284X{wq{>8>ptm$nf?&h0#gT?mu3>17R zspSneUW8eI&p$Ng*k2G*^c2@<0`uO+on=-iwa|YR&>yU8;#UOkM<&HXnXwv@F+fM~ zsZsZb1zX`0Bp-%)kK{6pSQymRN+-TC z&g1)j&;a*vuU>Ag_$huQNp$6Y(8gHzNk#6IDzOXj^wegFd&%PJH?XP#UZJVLULlNL zHNu}}T0=|O9SLz5D;B#%N~l>$^7eiDiB{{DKl1Svv`2k-8P#2T`$Cmc2Xspc`gHzU zA>G8i-3L&|M<)_r$?fyzEeOqeQcBfEqeti`T|(#Y3DzCnz?j57DCa73rUyA=B+T5B z(bP(q9K^mTK2n?~14hBErXJMj^&b5AVbF!^RXIaQXOjqe3-DT5n}cDj5xEa0X-Ig4 zss(sm0J;u!CG=~;5Wv;f#hEfAP-Ueu5FjQ<1oM%7+tu0vW-hDfh1P`RbGdP3deO!s z*%1UF|5>Qu;)a%bF?~XFj%GIkzX33pr|0w(5J1z`u(OH~0|1Y(?s`pXO;p*^i;zV1 zF{d#_zIJsL!)An1qGev^vj`gVn1eFKgzCN%OvUafFEPTgXeC6^p}4BLMiAg>@gQ5s z4aK?EAkY}`3L^910AR{h&5kS3@c=nRT@8wN6)t<_h!naVkEF}J2&YREzOAAYZzXHt z{jtyC#j}Hz3`bJvCTDG-&k?ex%@&ZQ6DB7}ih|IIY}9_6potNHA1*e;b0^^g3i~|; z!N?Bgr<0`MRQ$#C~{)WL8Ph3tdgoA z;1@H)JLPd56PR;Nb2q<{*rWQGn#Kafb{GHUYU1REL56Da<`^||{_Rtj#fo1JTAy_Yn$b@J7)@o`nUi*R8Vfyf}Y%Uzdn)JAnR!8CGlONMo034X|i^}_O) zyhUSDdB@@P*u=mUC@f{SZRqjVPW6)}W^MLya9Fx@fkGmbCwg7!VOE~Q zfaJ_sh@cOX|LAlEjpW^=HN_XRr^6UkcWW`T`CT=qaso&|Fq%AvC zic{41AOnAQVCPLK3V;o+oMm(E?%`xZ(Dw<3vbJ4k8nG>uYSTnjh_)iQ3uj1{bSIBy zfn`)t9d|+%l$Sq&+c#Q*e8z%EmXt3WI0@xV!t5wZ@W7bzIl&v7rnq|JvWjmiXtum- zK!v!HoP{4H3OF!XX!3K@t0vhqa759@@d$|2;iIV5f(JsOt`ZPd8^9g2any3Bxw3im z0moK0r-ClAW=(7~u*JvcbD3R(aOo9R-;;ZQ_ke_20fTA4RRuWVD-j+#FhJ>Xpi)EF zlLxp!GSHw~@^wW8jYMyAGOXnV_uXm;4yT(Ma5Fruygj|WT|8J|3e}A>E<0d_a-1v0Fs;M$|#(bMsfDnMIuARAH^tu_EUT7hkb9 z&UUi3Nsu|9Rz+SHPgk)-AO&fpx+f|D+CCQnNI(f^&whp|lrFU750d-E8Zp>I zWnr;~5B7gtextHu2uhJIiTOnoPKsuk0436A(t$6>( z$Ks)VPNsFuiM(m{%qiVvL_o*EjCqpVdyeDKU>fHAfyOL3Y}q-nc^0(KUzo}_L1_3a zlxvhP2Wh^Fd#wdivppNHHrfWof|Ngfak#V2kzY#e+FiyfNM&h@>gm~7#w2SBAs$m(F9%q(%e3ux}s za36JiKFBnkN;9NTB|rxq(Y)Ipe%TSpRtW&WpuL_>3gKp*>B1=r82lt3xfh z=Rf*9#dl0hKTWN_!mYDvioB4CNS{s^%TP15anGNFx&T0{^SQ@pC9>$i&rIDO!5l$s z8BNEX6X0jAorto+ihN1xi!un(>0 z(rXO4GAWzB(L1&N?BS@nJs4W8hHY=jL@B6H>2xgTQ(K15s+ltl4nek`J*;QI^XV-r5${WWLAy8-L7UkR-%VG-XN%7o!`^jG#0$B` zj8%#r*J`k6vuRy|!O0|<{s z{(SP<9jk?n--&?QT?n2WC>&y2ej1>7U=XOH-K7K>pqPax71_si?T5SIK~NC7HH->T z-ir*73Q)h+ClAXVlIoMsH7Brzz<<&#A1WYf^V+1Y(krCtiLSjSwO6NKTw%iONdI`t z!)!sX{+eCiC|RZ7l{(_sSib$s&e82m#Wue9=Ls`Yilkaf@atP3C0ybC98E++PH90y z>rBmQiU=NjB|@B+K%dV2Q*gRoP`Oe1V{5NGKI8w?Eb`x^?=E&Qg(M$AC75iJy zz03(3?kij@Z!hgAS6daUy9qw<(T{Gv9V^gddFj z5%A8hJ8iP-S2r*PKACOX$2nU?pWTqR)f_k47^pQ9Ylw1u%U@wghx8&I=9sHVGz<6* zXg(wkWZjYX1Y#0;VH-M z!_HYZ^lG|Uflf6Xh$Hf2;^Q_m!4A%ucUcZyQ*Tu_P(SW!#F+jaky5!OtY{u754cXc ztmW)#L=Z$h!(4iNs7TaJd=f{*kvisz0z5w`J9x6)lDnr&WI0KIX-?VLmPXHub-3c$ z&UsM9hf6nvOW&6bUMRYZ2P(mOX<+M#@7)*JlMa|aSh*7SOS64DWSH~1kh2FT=BX!x zwTu1@<=M-rCn6U{Dc<|R9g^H`kmgZ@c=tj*vKW))gYEMG=~uIgLD2>LPU`YRqde}5KRP=0xOj+_6Mz^$z`;p>B{QLt}LZB<)dCYqId z%@RZWE_j2?zpPAZaGFa$c${tOHu!e`HfzuQrZ}4zT2!HfjZWJKlc=O!Pqi)UpDM6n zlL^OPcC>wq7ir#ePCr@2ZKc#aH}xqN9p>4d65O+Q7wAd@lE`+V3mfE9{|H z)ST67D&4S1{n_)(FM9WUZ#T4(an6_)6`F=4>kEn;pe~pS(mgcKncIfW-LQv(%e@!^ z8jrZgWhb%yvar4BpxpJe(f(F^O%uj#Gq)8NvOv@$usi|Qnl5Ye>G(wZ!oaj3z4ORs zQ9VKNewmtCE@VTRC!|z!2!JB(7Th#hndUl>BjXqiJBnN%K6^FP%>LQSRFzvt$~Jje zPt$#&BLoUv3O}D51!1D2uGr_l99mp>Fo}YNrrpK@$T#YpF|eGL%(L-J0mN;(-%jHC zSR`{Vgz`r&@Z)uc5AriyqkEG2Sw|5^Ouq$8^-5DM(P_E*MPGq@fJ`N}H9VL3h$$a1 zqVuqC9olnT=slmNe_Rp>ckaekuh!Gf;4i99e@DovSiE<6)LLJ(o9sK-h7NAOl1|@g zo{mVmyCW@w1{hDg1Wo03^_PvQ8&Qq;_aY-sH42YrXgjaoMNKXJMQ^uisSL$su_1kF zlIE<=Sz;18e%W8J()O5ICJHgAlSfQGCThyQ+~?ey302O)pJ~z-H!NyxX||lOD=%fTw5ky1cgCU@ar& z>Ga8AdR_5emvR9Z;OS8hv;Rlnv&>BC_TUSArS zSd%EVp+F#68ER8wZ5fFiS_2I@qrSTqO!IA~kW8|CHa!{5vM9K!$sX*SAo^47=38WR zq>DW@o$mGHzNU;hu+=)tL)|w4fbMP&^ueIY6e@U{;S+{T4GQ)4y%m~Mksq9hb$s0y zU1Jhkf9j%q-tP?^V?FTz#GvQ5N}etJ0Jk@XHX62jfeM3O^%fE3u5-E!{r*B^ z4(WG{!hSRWvncM|F)yimdp~b`@bNrZnb#jHwXM(-xU8$GM>JC>bYtwpQZzTdJUSz{ zR!~E$(!|IAnuR&`xQODGrkRpo6Pve{zY=R=p9 z9k6=rx--DqC}RAqME}FAuv!f8HN1@KJ}Rx651`3HqgGG=eo84&=N0$mG5l6pH#;#Z zkaFvH1psMkDW?i=gf^PkwRR0KGbS^`Y~Pi++pAWY16?yG%x;3Za?VFc&}kwg^fu&b z*qJcF!o(1uvf?DiP$vJNq;582DHtH@|D|x@15&g8+661ICWS5 zPicp&)=6M>Pxm3+$(Ic6(8}mGzaulO^Ia5AGk~ucPG$DSA%co}RhKkUP|8ra{~&r8 zC*)e`W(wo;{AiK-(5b63n@+DDb9nPu2GDP#*r0p&NpxYGd-L1Zsr^Tz%;J-(Y?CJ^ z+?FGHLFDhJA+0Sh$lk&Fwn%<{UVnSn3LIDM|>CVSLsEM6#jx$fSFjLqtgr0_%bA`GN`%=>{HUnwDTF4H~`K5!I% zG+msEM!}Rpx)}&y)m&^Z_N=J+*vRNhhBnpAlw9ba%UB~u4LzK4N;``A<*4DBeRz7< zg;`+v$$3}y0I2ANjC)_n1_r9y+3K{XkP)2sUFtv{Ya2johjxi+w)f-0gmX0OApKc< zvHuPb$2jBIRt#EdWxFbS(cVrcrW?SH5x`Lf@%o`LJT13B_~jnAbsI@Z?h;Qoer;A5 zT(@H$(fj?JsyJ1tn4wD?fe*aH%C2jk&tsh1?UvQ`nF^#4QMC>|EQ&XMv;UHQo zV63YFZN0R*C1^UndOnOy$UKh}r9^x-LP8w#Dub0wzlv2hgOcUDUZlBBpyZ+ma zm6=#Bk32vL-=QA%7OF8zlL-!xnm1#;$Z5?^?{<+UI{Tn2J9Tg|Fw?fut-DYBg%bh5 z3~Wf+2|*5LY2T0f#z&dX1-atu32Iru=-nC-O}48;N$=peIC`!6go|20BRYQ_i_F>C zV(2$HdD<6^4~<_5+O>{})wr$Pj2`Jjg`>4XwKdbbUDI8ixCgt zHk)qS>FfAStt&@e@=ZyXG``cxhRI+cru8B?kl5;CulZV61ncST`eUUOrBQCz}KuX@J ziJqb!(P>rr)4@|@SwHLVT za*WcZH!54Rw=>EFZx|5f2_Q7$`J*ZGpPNh6f2|IfsvWjv9CTrC?x$2pEZd9!c(t<( zR=P_(qN_$&z-TM8YPnrxeM9wY9m`(xY-c03Kbl{FayT`JGqlt`1Ktgr`^i28zTf>;}|Vi1M>H?&JM%P zJ1TwIa~Ag4h>t=n0E@V$%MfZicx%Q>ck}hSUKKYzF?T?gPMpfExvZ}!0%dnzCVSQ? z7t5b5o_jWVXz=LI*E!Y_p0~A=-DnuH!+!?ZZN>)jW(1oq*j5|pJGAQmW=6^R`Rug3 z8LOtgAgrNJGN64-zZbKnCtM6|Ti&{!rn1;5^ez4QI{fW@I9CFPg!BO(cFJk zKnNIowpeGp&J29HUtPHh=$8bDo*c+26XPY$-@}7wF4vsunLPwP2+YdTXFC+IYoCnz zB=3Td^r=;_tJ=Ao{>W9lXG+4kB&goad(;pMDS2!b`A!LN>3Qy4$|wzKm90Bikt1xM z-uJ$v&*c&&Dy{nOC}6cd+p62uEP$!?E=<3#HT)W3sHb>a1VC?xPj<+P61h!^++&_e z8lMB;N=f&u0cpT}(R_=nXcdB*F03z_D9A`#=y+mabH-Uu1)|pm8cKjS?g{Gtc5A1@ z6ip+6aKfPtBxRcC3R@l4BgnvV2#q)(wx54?ZrWTI11{m z;Z3A_Nw$%-fE>Eh<2H3|vMLcTw>Y@9z3#=>Mjft|i8}*~jZYA&NC3*Trv1$WsaQCj z>`B8WCV47lu}LFJu!w6I7s}CZzD-%hVf|}NGRUe`<$eQ=TFv>liIZQi=y^Y;gI3$! z4`b&;usM=5$xvS&$Oi1^;FRn-GbpTdH6i~4>$Sm@qxk`$MI?{t3|D}9)>>`om6!-Os ztQSZd+&hBb?>E+?u~F|xGjpq;spX1hy@wDh3CgGTviSc*PV9OG3sd3v(q{*_z#SNB zv6vGWYzRfl2O$CE&aF3nN;87MZp3!% zowN|`MlUKd-(*TfVDjGxZgkliZ5q`*cz>O!t9Pi`4ITjXC#sEDiwP`T^5tLc0YmNF zZX+;8#4_!+EM5ra5$PpOmD|caC|_`fX52?BWx6D)4XmzXS;nu3;m2 z?BCXjmK}uJWeW~Z5%!DQ@rQB4nhG`TrodsMN0or)kLAAi*;$4O1SiE3VDVehwlDeh6Kk)}OYG2%4BP#hX4FhTQ5oPdY;TNe=+J z(qa^dSr3{E$rDQtTLwAJbs_tb?5v~;=h#9~#WK*Nt-a1YZR!d>A~%T&<$f!MI}8H= zjFzsKZ5~QYgP(mUOd$OF;E3+651ujLHNfa5Ek3IBQSiOQ8JnW) zEPq5!o}(LJx@{;rCQinAIG+hbV{6u8Z;-D+!=TPl6A%E0Bu01p-k7+OzpW)rp#OnV z-(ox4Xo}T_*0My1@S+8lM6Q-f?_!r=s^+kkA30jOxKhMSL!KPN?&9jkrA`p-0$PDO z(4R`CO~Xewcg^k$9qK=sLGfW^on_|%x>+cYs^>A9benWipXSWt;4#j_GQ^ z!DbbY#wVe#v%@bE?)W{R3ip%1`$U-&1+W6J1mGCdkk@2EuWw*lKz-p4!TH8t0MpBaoFNi`rcayAF41zhP=nb= zU^Z`4%Lp-|+*i6Lo#HLOsDiUEuldEYV2o{7Oy<1L-r@Q}q9d%JZG}I(9|$n>#eb=1 z`q{Wg4j@T@?wu)MD-T2EirLO!tX=%?x{)C17=_B9CkeokSZW!P7#J0P1l)~KB zpv~eq=NBq@j8x+Q3D_0?Q!u~bNA}WU=S&7ih(R>>tpujTzhos3zbN*@Sof=7pRDec zy{1Wlb=gM)cRn|GT4NuVaC8Oy(BS7?T{$$No<)Lz7Pb`B>{MRKRWNs=&eI0JN{Rox zSRIkUFDFHG;dZj!DVXY1p40(fv&r)#ZZ$xE;~qKZ#)#r$PQe}88MM!DUkL+KLJ1W{ zS=5N;%`?MX`W~F{#rA}%OrtWN0pmGTq?J0fBe+{J-uPjr&$6tpGKy)$xF7R|X?hkG zADCQ+Q|Jyiqr?X##5g>Ug4lPJoAxaDMWtC-41*r+2r3@-RD{S-4YBD?=((rR3VPEt zD|Ho=<%F^nyJ!rLWav_91yufYMufP8<^y>JZlrW}_yjI*Y%oM|UL{lU6S=LTD6v zo43p;E3y2|8sAL@xrD~!sH7Pnj8F6|YY5nnlt`R!+Ul~3`@wZHU4?EX6ThJ=3gb?I zFR?75{Qdw`+Mc*z^)qzhv@B_JL?wDxDMP1>YBYA07KHxTbon{MT z5LIaO{%=HhjnQZb`_(Xi5XKvy17C)I0Kpbm{S^1WM-dK4TC!Hw%p{+;S=Dotn%PgI zMv_@nyO-9~Y4PR)kR$rq)+4&ZB4j#0{KwW3*Hc?{u4h>0Aph6M(A^&sd;zUm* zFOq)2FaH0T%himfQWmvv+E;N@Rb+sS@`|Ui5VESIw3K5+aN;(0m&12%*EX-Q0JLKx zZxu+2$f0)w)(#OQG|5F`T`r~x-Xjlsw-yfK8bZ6&WJ87_Cu3By`S@F6w6e104COPg z>h~?8%yC{xS#3r>P z8FU#hnfA7`_DrACPXpji=ll0jQXzkdR!CXpUwa3nCE4B72x-;c)D*^Ucko0MD6vx) zAN$hDZZ5cQtK6AR1~u$xoDYQ#xb@71+m6{Bu72Cyjyc#be{ioFwa}}@8 z+`zu~k-yY_Wa?fW|FigiEVL}=^H+$|W;2N5*|JQ8#HO)}z^%=7R0Th{2PMErJm+LY z{A*DY*FM4s!?P-3i+HJ-cBRy?8z^ zgRbUz{1QtCjj?z%!oKK5S*Sb|RMRxSzlWdk8f?5cD08acD+0|!BdZl0BV}T?YqWDQ z@ts|v#eNW$e-&3gI1_PGCH|ck-+o>qnew4dh4bQ6(y`^SRZFIR zW@V`c9Q9f7M;KqxF@Xou%lz}O-9^3OYX>V5{w_-394M><6!k|-kE3=(U3rCH@zvdn zIAfNP=A~H%uD(geYjWbG$n~ZmsZUteL$B;dpz-#!ke;Dou+TQZ#sg~va=}(UdO1V) zCc}%w9^aUNw3H=p?gw!*l_}+xqak3Jl>ECAPT@!>?6H3_BeEy8{qOhdqHj&-KOdCk zhSiS=G}Pr zAe8El7&>)8oju4x^TyHD-WB_vR(KqUc_(+Z(SRTk;@?Bm_Ll|3VbwD<^3JrhODAKK zU!!T0K=c-A%->Dq&_?kTl-?rg)dFb*)l%a0qn~mpVVLPQeguK{Y@`D9Wh+q>Whp&( zWQd*-VSgI1YJTo3$;%V9+~|vh-+wYT`vl$|+w*3|9<>v1TACM^xkfcKfV55Y+3w=e zt!3eZceA)t)dmb?ZT1SY{aX~4pg5hk30DjjqouM1Bez|V4sPi$h8QYX%mok&i}CyL zv~D}JB2t}jcJk(hW!|Ht1_w})8cjS!D@85SS{2GvB`85M)KKTyOk_sErlN|Xfhhh0 zd2p0hHB;oI&qENxsg&okN=UsPSZ8Gsxu2}K3#&f@F#4AO;>(BB^`+3*;UI@zMFh(* zz;`Bu%kg*<5s%R4Q0CAjR$}!aXEEadbVTMMP|aKi$q+?dx+b26&cDI012WoD0ePm z1wK_c`=dkTBQS*hvmUI5?m~8>JQD20bO{_J0Y}T^?y#6pX^Bsx-a#QkrOx?#v{r0q zzo@g*k`%zz`{<$ca|RBP|FOrddV=6Wn5HrVdyV>`Bz0MX#sh7W77Fx2<7g%1fX#fx z^C84UK^orCnbONGBFXPQFXLhM`lZK;o`ysq8lKLN=9F6iC@wfC!x%cJ? zZb(j{#*^GkJ$qYp3S`0N{Kpa;{m5TxQGS~Uj%*v8@&$Z8M=%bJy($^LewPYt!iS-G zHV8{jmD~<&di}T!`wJ&=kXz$xCaJL83`@$M6YPxZC=eMtWgu*8k+_FD8H{!~QvH(~ z`8&3rC)^~f#A$te_|ajT>l+!odinY5qim0DZQ4Enh9SrIq3o9z@C@&~{MkT2wTYd( zH^QoGgFYkll7fE_)$nsv%C=Zzer_V zBld|>$TGiaC=f58RYLF47{d1qq*f1nF0qTZlHn1+!ir_9V4YxE&_ZG`tGsby1P57g zt71XCx1hxG2@Ev3y`tWc&sA@M&%D;aemm?Cm?p-O$@15lah;zavLI?gRT*x}ARM>` ziMynW1brRmRF4zv#>pDQf*Vl~~QvfL(IQ?J8L9evN2A6<^WfQ%cg ztN7N7)_wbZ$@&RRk@0RxQ|_XzIV(t7#OxV+fa?(+=DV{D2o4#RPCNYU*r2`=YG!5s zY$`W?O`g%rXrGd8U=PZyH*U=9@Xc%-GYpx|Y3sj^p`DW>uc|HZD7u@@q(&thtV#tO zEF1Ma)@WgNP^&ricqmZv-I+<2{fTZED7K1rm-ZApf$a3Q9ZKGO>-*w=Svv%Mj(Ydz ztm4)d3mnWQZahY7E$^>YWNz#j77Iem?cHxJz{_5L)_=Xx1y1Do?f7RB6(-nfP%NQ7 zk<8pWFDlCI&5e*NBAjG-Pp+Mt5SnXS1k#9IrobEui>tvrU+0tCj^2rz%%MeZb8pn? zWDp^&Ti_Xjlos-uBfTrEC80eFz&r@<4W}h#vbr%Nw8Uv>2_Aoai6a4 zzKrGp%ZI_aoE-xVK%G_#CYYrfaLngP`XTMt+qz2FFpU(DL9%jsr91A=$9zsFd%(f2 z$}JcW6-~0BmMju}V=7|)SBot)U(oZ)^$JL5tt}Q;t?>PqQ}9;ni-((WeBL|Q%00#E z-10-UsKUHGKQgzG_@T|J=7{E95T*!uy56>1h~Q@%CuAncg!kyJ})QdrdlwUVAVBW_v_T3_pINK|YYo6Qx8 z6%Dn)*h5RajMkbq99%PNj<(5AFTo3|wQ%(6ve|j#t?8n@WXQ?89+-wO$hQM6f4Nx0 zqbY@}n+qq6p}H@pSq{ulffV4e6iU4TC52?IroOq#N^5|RyADJHH_qv6c&Sjg03R&cq!W5T|dvY=3Wt)H=$)I z*4_Z2iR;Fw17Gm{X8 z0u`Xp;7-)47_W$XY}Sh#-LA`z%IX1K*x1+O<}6@D8(kqxGw-Z5)*$3Wb;i2>1ESmB zI1fLV5Y6#UJv9aI9$Le1v>)a><*ilg;%y`k_ZYtW=FVu-9ubgQq*bjXk8gIj7 z&kHXesXaV+P@L*stX3|D73?C!)xN>Yu6iyhnA$NFbF80z`LP)7St|Ju(|kmnzRb&g zfp>};{D92~lKCD5nW%3Wh`cQx+6l`R!pPGYa6BP zX-qwvb=g7ECXI;PO>4*~2&!9qlU?^L$%;_85PK`kiE2O|0PT3;o$y(dWf(R$`hj|V z#~p==#{sX}8;)s?r%Ht=tAhm3U?x2yAc}4+lh1u{H#NMeLNhzD?Wa_ z|68!KY`n*{PF3Oc0IBuR<$D-^Ka~{Xaj8b!WyJEU4a>Or{{E%63 zJt{o7Zk8SV+JJBfj%Gg^r|ah2w3-@g}sXjN2<2qkQt392^JILt)}rm&JR*%zE>%zeJZ?bBjln z-nCK6B)RX5ObZ8SX71`|+8{Rkn@a@>NS)@XGBq!O%%-{$XkXhqD+a+@3W39Iq|sD2qY5@Sbtco|F+9W1HO1r;N}|zq&*oy4xk&Tj$yUjv4~Xn>avfaXX8Ap8WejF7j1vpgWvvj?09A zvWqJ1_NtICM*vn589vqw>HU{UrA}#%cKgMCTii~0rlVtmg3|ZU;p~!Zti8M4-aBCG z3oKa9N$lDBI*ZyFLE($;)B08<`kT_GyXxmhN$)%~42wEGerB`x}=G5xzdhK{{s zIEz)EiHKeJq5#X9ULMJoS%v^RK*YbHDfWP*wNn1POEq5ls#-`Sy6Ni7D;nK)zlI#n z#KjOk+|(-l%h4f0D-eNatCqa%9VJKWDCCGe&P;x`DiP?^m;3uaF~++Fjh3y@+%<tdGRm<>?7dXx*)yx?v85nr>YuNw6tVu@0_KJV$+y?S`A% zXeR4-_td%(^xbt+yHHK~si9Hd5p$cThRQe4^15*yJ&tTE*FX;WaC%aX)-7)IR_ep_ zcSUyJ9LCynh~oAO-ZH@on^^kWDCdgL8HK5=YK9OA5qMx2@}6E=-P;JLY!H}D!hV^n z1~wNm5(G{)Y3)ev09x^9mQ@m}SahjBi^@UJCMSIQCAOIjhJ@k18Z>s|2i+01`UD}# z8i*NZI!vaq81KUe?3;V$9i&_;!};4F*UCkpI`Vx9$y}PduzeerE*IQ- zXN?+36JuzzT+_@m%UyigJooJ8U9D>$-98OQuK&N=0BEsUddTr@K=m|MRa>kbVeG|DD>0Y4sudISQwZoP!orU-#SMLKtJA5pdM;7e+e`R(s;Jvm;^vs zGT5-WAb5ZthZ^rYD7|gq`z!(6wpQl~?-tqPx;2G2hLTWlLK|zUdkSx7NF6?p(DRc* z)kV>=1S8RGTysj<)hHdw>Jtz2h7fEj)AKi)ur_mz^DYspP$qH_5#M8Bga_|7r|Dv) z9{N}RqKjVmSPaZSerC=HNg7p7T|lBKbJsJ_q9NFgWa{mlhTx7MsG2fGQovN`mMMY> zBx+UV&L1E=8E{&W*fOP^9(IR~SiWJCBi9c5cO-Y)VseShLv=JY_O4KT)e4W|8b!4A zH9%hd&Q}24xfh1lW#22Aw)y{sW_-goSWZ85dD-rek5`<={m)uWHg|?J#?lGXZK6IFW0D-OM6RsZRYY{pF}1 zs2W~_kE9NJ-v4>qvHV1yDyKztl7J6!-otE(@1jl(j-=fJx~_nJ*o;4a504nMDNl_S zIIlba-EnXFFAJXb$pcM)YluJg?!*=DHKrpG?St-CT_6wbk3xM0MUI#}A3Lae-v^#i zdh(4epCr5_m~RRj%(hpjw;k)0NnExeS#M5j>?$6t)nQLzxUPcBxBONTG!T#D%!AEC z7o3cwjYq7x-A*YwDq}Wzk`tA(V7g%4Ky9$gjo1Bm7GvN3_nwgC52uShyFE2Gk4mmC zMz-2@y(4XS&rVzjp*0I8OEX?sfb1WCYGs~Va!yMr>b6 zPf4n`$_j__aNtk@z9Y}yN?+qDiXu#%U51?mOe~Bo50zgQef~Z{j(uj0tdXe)JQ5u- zI)vu(e)wB*h=g*5Ne4ccs*?O^f-q2Q%0vMk@FW#0hD8ZYxduRO`A$X|&@?(M2N`Se z)qGJMo{yLbScKUYDHO|)Yd!WW+jsXQEEE=K%lS=8Yctm}+2s`iYM=`|d+!EUnQX1K zd}9`g$y4GOjeqQo3_koy-UmH{JAs&!PKzttJ4pG^s%S}qM(nwA6ZJYxh-HptnXq`3 zEuB$w8YgRk@foS>3sZH}s5iX8&BTi#>gzjCtTa2c8Sv~JK1FPz2@sveszLl`w`?W2 z`+sr^jjHJ83UPKB2-AiOgA38I!Gc>tas2@FvOjQc~qagY<923+o6KR20=(J zTw}KvL3#X>*%>Wcw~Z-Rc5@)Xnquv)Y!NxN`a8g0I(oV?YglkPAfpTx+Z7qnbzg(6 zo4XuJJZmahEpI#=XbNRm=^;Cgtoyu1l!bGrt0|kB8<&#Xi~XsK;BuF+t=Z_Rx6z7z zdVYepeEKN5s0+==?d9{`Ffu)Eyro~8pi5ZZ+&e9 zD;Vqj=WUwJN%!h;>o^-?YrxCXjT@df(_1hz@boN>J5)vCGml|Y<`ewm*a6DMdrt5B z%uG9Y>1PLVizrTx>-ubheg=G4eV*IVY88jvJ>m$vz4ykaTyL|{!Z(|4H%vL?7 zd3;>6J8@=$kj+lu3%t+#gZ7KcytH2BFc@y($|@|vT0rSrsvQg19Mc#l?2!h;+Eq(R z9LuiOOJ7xI9<2v(G~Z z8-pA}E7mc|S}e&Ig0Mg&Jv;0*`Krqyv4K;a!N#y1}bt<<75`mo|q5t*mhD+bO?y-6VUG`*%n`dS3ibx z(iX)K15x${lnaluG-TAtZ$$>*wAPbJ^jjIfn?>e)VXOl8@WP^|fpQpiz$yQa zM_Ezmk?0~WyGMf?B{}uX?-X|i2a6h&{gePD;N}Q))aj12)&oLQqV>|@?(yQCaj`y% z^}mEVjo_R;>Gb}#k~QxDBEB^kLA@STaUQ@qa=gXCv@E*Tuz;Nu5)~*OBEYouiC4(J zPg#j}1>ev4N0nhcg<%5rcH^UFC8^*D^QP&^xa~m@r&dHUwU)m^g=c#TBnGlteJ65l zV!(l)wqtgz5QQaJCe3|GnBY=-6oqy7{tK8=pzO|qf`l0+bIt%~WCg^eFU<*Hz@*d8 zeR#Ac&pEn0pNrz@4U48?;{-`P{VqH^<q-u5UOI z>%dSEg~74gBExhaf4n1RhrqPmZ(|P{1?xW#!Y(iLDv5U=(T9L&G$oGoNZ_Tuwv?3O zaB8TH5t39jm+OFN`F<$b5Tu|*2s#mA1=N4DY|rw<^<|`jBqv4c-`5D-akBWk9}8v( zho9Gy$|K+oIx)HWBRSSPm?oJmQ@s^nlP<}8Rjn{G<2Z&x3}QwIGChUGT>Ek#qg0upcp@m2OFW|Uam#m#^{mCFZ@!M17vOM&ecm}s^(V|}&RL_cU@a;>jECyCW-A?6hP zaCzMuxF^w$b!?NPzf)cE$W!F~DLQF(Gx~wnXL5m+h#HfFG`|E%V|9_?@B~5uUjMn< z6P|yH3qOHj`?wH!!34DklBx%=;QYPW7aW5tB+?U^bh_FI`#-jp1>$Z9f!e8sqEfGz znWpGEqu^veCA58sQgG(;B4C04@LFqX6 zn=*Ya8^SA9UMl^bp;zP^;YoI>m>f+8PtN&9tC=#@T6gaeyvW|~F%|5J9b-PDdmVm9 zU(}2~p!-FlE^lk~+&cFW{u`K09tGgeb&X&lD%>7&FD-z!L6w=(6E-YASVq2 zG~tTA?qz+1o6rhH5BN*nTU;F{r~{1IDD(W&iQ0bqMpquH`*GVm!L0z#wKXU2DY;ik zspBz?H)|0$pd99fcKr(NZLVGo?n0~P97EXw{NpDotq$uN3l2<-PLrUtyahtl@jm^uU>DFb#yDmKX95thDV%93G+N~-&*_-zG&(<1oaXk> z@b`m=rEwPo74FC-Vey2AGPykfBTwZH&lk#LAbX1D63m!iGSDutkj+szv}i`}7}Woj zCzoUtQ8e)!UCg} zLwb!ob-UK8zF^?&7L7!eM$z%G%_zA8uK$P98_%yZ8cFO()b#G5*<1P2f7FV{R8QqB z=!EFbA6z)5ay3=YHoyb2E#JCMsWkIp(VY~5HPJ5(WGF&|6X}1Jg_FNgH~3=pw+!PQ zf+7azq?X+khJGmM&j&Xb2>tQ!M3v7Jmwah5^sufM;Vh$)gB zFmk@~jJ6Tv+(mH+1hV*?yAL8w zzighaiEvNS^R&ckW5I4O*aKP=mcAd>a!ysc@=7?x#FJ!U6!zY# zRpR!4HJDR_Ts`MGBSOn1YH$paDRW_I!>VCtMhrm4=}qNbra6}Ggwg(<<(z<Rkg=RD`R?}sL@#$g?gv5vCE788aAG{yI9(KK-`TNx?z8mE;&Md+oOI`{$d_WZOuvO zTKucg1<9pjopDJQ{Yac!OmYGaT%#(JoXsF<;E2R+JK9l^iAE{`O*rrN zw9%J1jYm(~Q$oAkm%!=*a$6~@7su(b`LcXDo08lCF}~{=b^5H%TBy!qXHwloqLt;? zpz94jx@~>B5civuZC|=v`A=TC!b(tPo`B&Y+jJ5&G4Fkh?vW5qkZx#*%liNqVIcm> zyNOzUeSp6!i)%L9d`5;Zt3Ms(xufW0LeT*?YpP8_;;L15Un$Lm&+6MCrSkY-9R%{cD;=d~s9(1@e$5q~@3 zMpR2&2=Vucl&H}3I*)j~ZFftJnbK0J5=u~3P7>kTyTpxFv)2W?cCKO(El9R4i;D*E z2xTmNn1bZaX+kC}OO$IFE9R;j5VwpR2(auVWQ-Ue5#cRqODcxY2%OAvs`JX4++oD> z3PpbMc}4sOT2a;EyZfbm0=WnyeKfFl|1wa{ojYjAO{SVbLHBXI3e~$H2eAdUdMTX} z?5*)dq3sJ{$m%EovaH2zMN)TO+7P5KqVzy{Nc03Uf-*tAz$y39+p#9-$h<7r@tkuI;%~WC3s>%Hw2PJtX@(f;4GYqP|@d0Hvl1& zIYf)LD&)G8D4S~od~A%44 z8(tb-Fl_>FlAB_$+|knq$sbT1Fe%riE7u^W0b^y}01iu>p%Y?vIHD5CeS(l8AxAHH zFc_ENE*&UE&wF(dRddS;Rj3uil&iof1xE)AXzZ|kgc^nyRhbyqPt-Jw7G)6^11&|X zQD)$Mq)Bb+BzizFRXzdPP8nv~uqJZkKA5-1#UZ?bvpHr63 z6{IVeq!Y^+Ahkosz3e`aG{NJYl!4MbBhqv<*eJO>)&7)K5*D<4Gu~FeMZwaeEtYKr zs7e3q7^C4{YMPRvl674C&6A-DYUi; zG&HV+D7k~HvoOlny#A$nsQDy&b>cGo#41P;p{fHAv1=sU?bP$Wm$`!Y1E>ve$xZAa1&O{egdSXDC4pVqEF({Bu0omAzcX6 zvnnsfvd^`kme_v+ic98NQ?=8M_^RHS_2Ggw;iJS35t1KsdBHcHbcLv?)UlIqz8ImR zf?~sOA-?;C=)!?6*1b>&a&sCr^0Bp~)exA+v;3^ka7*Jz+Z1Oqnn!iflN&T_K& z($cm7Cr}OOHvqN1Z<~Y-&(G1}_qGE^LfD^!G_iz0-5muNt4WMOJ-{}rrswe@ujx2R z{@a#*t~SO5wq|x25%BGVf>5{2z$0aDMG%>0ftG;;f;1pFN%KHJ!l11O66t`Zl5K;1 z)QiK@_;8TO)rRI2F;Kc*A>|aFZ9}+dfKSnnk=7rfK!A1#{93fFl7C=IUTG2QW9v5) zCSRGL58XLF00Wf(&&9yRGn)c42y0FiiV z1i!EFhXQkOm+tWw>H(q{xZ$$=s)vH6I8-&HG9)EpCz$Y{jp)VmEDqFO)am2nhI5YZ zK%wnj#>u6oa$a}Bk@8eI{a$4RQ9^_NJ_K<6ma`{>PPG3iu*h0pg z?+Z3d$Ib^ii#62oU2)`oz)9(F1o1#qQpVte@5};he`i9Dy;|c&MevD_m$osO&=aiK zfs&B1qCp*Ys&ruhJBmp#hB#8<+aICReB-He>0CE?TxXr=Jf=Du>ju`25`oRZpyR0` zInHWYe4B?;y%sl6$*^;82++zqh%fpB_?-aT)-LSdNv-W`YtW~mxB$s&ID42k$gjXW zY2e^y;_Kit_1fKt{mv`S}=%Sai|>eOOpEB(4T?^ zt7fe6Of`Dd02X`t!P?N4d~AfCi+xeJC$SF3;8!cfMb@z<;le#?`xGvtS&3lKkc1ce zP;_WdpdGnDh}t`~eR4cOGGUu-6T={1-Q}BPtdAKaQE$o-5h9swAgvgeiz*yg(s_C` zL3T6*f>Y2L}>DdO@=0OIeP4Uc^Qt?3Ft*jX?y_1TZd&?a*QgYK$ zR+TtLn z{pe2k-;7Nbn#mhx0uYosi0|y+f-eyE!e<5g7L8*9Br%iyM>Rv3>ts|hnX5luHd$}b z(hG?1#~hfUI4b#Qg!G@E>v=~#J5nN_87t_7s@|H2`n%ga3u0b9=jL2f z?SpAno7sr-#gVzGF0K9wt`7UBzzU2^O%x97K|TpkY&*TCOvuw+?ifHx568@;sD}JB z?b51D4L8B~y7;@cn-cDLZ`@(Il4KCn{!+{(k`+1BI||@Oyrl}MH=45?0s0k9i7G7%#pm6% ze)Hx4NmH9a?z1v>5WG5X#ymY!JR})5UIW`|F|#Mc@N5iCdSL>xJ^c^=AHiNkedNVq zVf$k?HV)#GqM;26r2N>*?&&Qp!hjK+MGCSP1;6{f7vdGhJFg#p1zaad5i%5nV(5plEp<92B8Qz_K8piTFJZB8$Y=#o9N z+zSKp3AN^FP*`rm1W4wCKwIP?ELJh%$a^F-m74#>dKQeIf_SB~jiEF;A7p1^g82pk znHNZLh0UeL?~Pr#gMm{A@ayWD6W#9LcQ_O2ZyPQXbX850PhMW*ZjHPrUb{FDW{i zMY&r2VwV$w*y98-pF&Jf>6r6|B74xG=*NDy+GC~azqD;^sQtJNXhdVqUdUpqg!IS=(R^^6gG+Uf6$2!H~sNN?*80UB> z)19;^>j`-b-sFeh=NnZ7C%N+iG|X@m59VAX3y2o^&pXTFQc(jm*qZG^gI?7Ub9q$Q zPLmH7iwVU#7xuyj!5CDD05jJ4Yu#Uho)Q1h?BXCW8*H9v-Sh`zHK6A!R-}4I(n3^bmNhfDLbt*3x7aroIypHWn6?42FEcTKGU&Bvu1FF67OSpB;z2CB*UQ^@M6I7V2o*0*IhQsqptnY znzgx#hZ~sB_1dLqc{mn{)PeCz()2cWo1r1k62DqHG1!n~8|x9+0a!w92&r*@K{lBX z@=}>i(nvac3;21rJ3Fm4N&~H%;_g&AUw#R-_#yppP|g~pc}F*jlH zO4YZdTby!(mH)F;q46O)fg~VV4dl9QskM7_k*PblJm?iCeBmo|S$kboYDRp%D~Sh} zElLXw(MHvWYJyM(weWh7*iwBo{5;l=C*AA`A?Ar>Y*J_`29^Px!JO!+$+G)G5 zzPFru+oPO;%sIgWPBbg}s+jN{)kvoSDVc_!Jv#HI8wh{(HBC;O-7l?Pz9j5p{GdRKogZQ5+w#r)&;L*QpXwEIf4eq`n5G$QRMhVYIGmtbh|bLS<#{ymWDs>z zP5^&8x94s{(f(too&y$Ir;&w=CXxCQ)A$}C6jx`#1uWFKXU!zEpPkbDf~xazBLzh% zM@Cn)XL?2_LT1zexD5w%SkN?+HY-rkG1=sr;V`zkP@NTxcmy)e6LEs&k>ez-*8=7C zzFxB6#shQX{-0zyDl`OrdF28W6eI&ww>>l51{-lW|FYIK68kv^RgY!&_LudvXEX=2 z$Gso@+ULbE9W^)pz0as^vme)5|6^-4k1jTYKP??p=o=`rn4ZZM-ns8I?&RBt4F#BV|4S?zibP6I`q1rJ z`}u+>aR2&xOuqP2JlTLZ+r%QLyNP|f?Qx9w`|hg5_}J*?Y8kB8yy83WiAw@eAT#^` zs~u35vb`&er>$K0XRNB+!JA}uJLlF&X!y#EEIk(XXXFQHB1Q}6OgubxoUBR-rc@sC z$)P*f3LAl+nHx?nZ2|5LTSl1KY6rbKKBHX*5II*b5O2JI`X$mEI3y8~PGFg&hRg60 zvFaF0o2rw;=X`0h==8NqcZaOzywR+lcL}@q2eax46|jEZyY_sccToZ$+Nj0u=s^&3 zE5aPt&0af?+E5Xp5#y>xm<3H4q3tZijAs{Qovh71pH|U@@qUINHIERuVY*2_Nuxz; z*Tf*;yeuYz0_@-wuaKTsQ%aTF_W&Z}Wxuew(F*b1*_Dej(Uzi^bh zmzog&K7X*%w`0lc(9M}DYl{ZE^OKlFA)JAy2I7OT;^;&tfz84EK-mdL1LOf%^eJaT z1p1WQP~Ors4>R1>^Z?SXG~$ksHw(g!fvwDA*F;hqboIduCqso74$=@o=r@-!qD`Zu z;D~`n&c{CeA42BlXrWPIgpmZGT_<`ASAl(Q)c^Dbo4Oqre69x&KU;;j10?pSBp|31 z?ubM;O|R-=)$;-YiaGe+wmlz-^Gw|{#6kUr#ZzHD1~$^!39zh8cI>hpHKLwMigsSrD{}#+Ze5UU zsjk-SVuJL{P%0kctfFzIwJ&XGcCb>e zCvrs1W4lDO7&eBa7T}IkSLBytcr$N*u>0=KRAC1bI`9yJ^bc<)r`UiQW0qYF4?2(% z+ApD%MD;$Zhpw@MRb0NjqtM;BD6s%`*um#AqdYX2Ny+1=^s?NdZ~3>FyGgSQxj^Vz z&??2L(3MhQY&;XUz(N+yLW-Oj)@vqW;XzQ7bP#jqzLsUGLJO)Gpa=L#aSte3}2kM;vtGJhMczKrzX|nUWty8wCEJh zv9Gei>DYm-m_pA*sYrt-WLr1w*MJ0_VQupgMwwB|5^$t=@o7f2Nt|ZXMv7Jbv!!#c zD!}M~76x+D49}S>n7V}(VONv?@h}hI)>qNQ&z2T}RB=ZgLPNd4gHI%9MyJ*D_r!Gy z7kISo@wj73#_)DeXZW}gMPJ1}FATX$R>-p!LQZuGwreR&!Z0)C z6cC?1AY*mWB~z?vMFFu}k?#Z4@)WWzp8}>`_Z*Y3A*L2Y!UqKD0p{>1@dh-!1aWgH^Oem%q|^{5b~Hkel$9WL57-_46Q! zv9dirRvS?O(X3wCF0&!4Q!aEDnO{j-=%VN!O(RRA}(Uz!#hB_O2d5vBl36BV+<5K>ce0uq__@orPJ1dsq=a3-&6 zMFb@&%bB!7ma%O9YzLpx!Ud9uP+Q6a@pYskF(k6lLHq}dtXia9BSw(1VK}#3F8Cuu zgZpkuSJs9R#R@l+;;Mo@Mom-UmgH1cQ{62llg zkcJp|Wa(@n&(DDLXG2uiz9ir_A;#i@4#Zf)w>@yY9=ZdfU8gYqW_Q)A^Xa5}pwf^C zrUaxn-_VeYri>7xGoO9E6&+81w8T8i(X91ZcQwv}8IP41Iu#>3 zML=FBCqV_V=}KEK^iiz#^BD& zO=Rxn$X@WUpBOg-vlesbko~aDRZYU6bKTWRq^}Bq_~p^X#qYP4Qv+R`C5ri1avXso zAOdCU&XEYE(*ghH*`|_ZQg``(W-~{|;FqO269}2*1N#+lry77_HEhG{&R5bGFAqQ7 zMh364$B+-*R_er#ZwUUpk-`A^QYAz|Z-M4hf9Jmc^so@0Z3Luc+Tmh3* z*#$P-ADQ?R8sy8;F#X=CqVF|^fvYHEW%B2u!;eIxh~eJDdb|@PrR~SyUYaI-^w*uz z_c=q?d^`_FiMfEs=RcVE{uHS9-p8i)uvciL*!4qiu!GO15Vm*iCI7-}czrLTG*52K zRoOx=KU**scHZ(M^q3=@hQy?Ubbr+wn=ox?H}@V90*^VMT{bY3zOH9)TXvf2(9 zcTFaJU%=7yBR=vNpl8MzplpvsK__I+*YakGoz5oQ5-GxA12_}w1W1>pB}}9+r%Y4! z(>}wSBB{YMKhcuf16x8%XM#40kOS1z;Al`*@3@3+^j$k$vK3qG%-&WY8_x}u6-8EH~w<6!@3DkboTQi?hPd^YuvPJ*a`Y%JF})l;UPy5s;1 m)0=J zV0p1(=A!$Zg*5aT|luMa2Sv^JvAq6%QXbvqgff{k(aH8RQ@ZMl% zsDSq4<{8ttQjOu=VR^A=Q3g|x`mN=%5TXLJa;{X2#GbK#(qyx>mxh-X#0BU@f$UaL zR3D+AH^IDkvKt`Agsc{p}Z$T6(0ZRGxN@ zbRC_OrQslO8{{+$R91{2yA`S&fJK75$^zW6MONoN7;~5rEQ_bq z_SE~tcvjetYaOn-ia?O>(4lI6t13F@u-sXq5_$oFUqzBMq>d-a0J;(@pNGhj6j3;m zCpHrT2luj=J|)M1p>$309j|oug1oLEHs&Ed%c8%yh)g6?#^e;r8e`NDmWmIfS8k?MZ; zc6>CLKZuiAdxP#YK(>RYXmp^u#OMKmJiY=Vx!;t=6n1wv2KS_rf6dlrRkqLQ@(jdp zN_eA|unhAU;qgAeM6oGhKBo5TI@X`dB-&L6|^si56d+L%$sD)G`<;d+A z=__%j@Hj6N>?1gP{*qs$V3og&JcH2>)NQtNgkwHSo4sVB*1YW_q5q)Ze(E<^CZ)3j zL^4jwvjf62X^lyF+wDTibq-@?na2UEBKwC!+B1i;ut1ED+0s;qWKVdpM?#8rNK3(G zevW{F3hrkN!Y_GXAP>Qc%qh4F^hS*(5fR6%n}@dd!H`L)b&YXqPTnSXCu$b+VCG;o zD+r5WzI5?!$tCjKI9>qg0Y;GJOGnz>^+WjCi|cFx?Dn>QDJ;?#NW$Ro$~!RcY6eV< z+lI!^(|1-lfNMAqNoR0nwt74;K|+el0!DyaO;i52s}54c`G%OqE~sFyMATq+A_oGQ zban}EU@7V|sD>TwTo?B-m5`c3^A zSUjrVnzUlYI`?gv>3rPAx>;^;!r)Bv}^FODQdG0uhdBj$6!$ym`$g4UCDcQg6>yJ#@?m zYR-@hQadJFdPsyKg?ZFBgx+)*rkyv+WT~i`s2Q?XPUX9I4;AWnxT$cqo(amtIdYv<}aofT^K&=r1CzE1kih~-0zTY2mG0i?2UmOJqOOocmx zlI(dA5kX)=@_32{g*w$&e15DiZXd$aO`rIyiJCs+%+O0?JVMI_YmS1GKL3k$Q?dJ1 zO|b0F3Sth>*R#7qJyK?8XU>EIdrmCi>(z_9Y6ZTfJi02sd5U=jXQkfLI=cL{`ZxIF zJnQ!h!evp4lSfE7b;zugV$TtGf~4AzMlqmKK-TLh=ut~Z^Mu@-yt;*!4Twa(B2u#> z-d76{Sp6C%=-?`eOS)=hZ9>cat#Ripq>h60`7;yk3ngsx@j)3;CSjs>ry|yIr(khb zTWf%>E&O}hro!+yN<8^CqTO~E${rZ1<|D0SAYut^Fb~U)O#CQP3h#}v>I%LGtkazS zEe??vybL+eil$v;1%Za2zKRQlVZ;%ZNm;M}s5n zQ^gd535VgUAWE1HF(K;)#$~#cAYzp&PG0ZolvSi0N6Dxn%cDTP;a9`h&nSnU#)dr0hp5H1$(l$usKqRCs-o#hYBslp#Y2^POz?3Ti{3B^=fx<5Z5 z2>|W>`*?}$%CV{;8?;927m(;!fapxP!+n>`8dz5%VQs zF5C>6zW1@k9u

B6*k#?)mp<8j7sVHPyQ#HYDIQQPrvq_TG<)I6!6Rj>Uf=2C#2O z#pt-YVCzlY8wL>%fv0hZs!QN7(ZidU7xA1Chi)vUA{FreD4LfQI&{i^7agE6;Q0#T zE6a;OPzLt9h)cF{dVexh- zJ4D6@qaDxx55Tf7jzaV2G2OXAtv*R8xk*`?O#<1 zUOPSsZ+AK{*zsEsUR(8KLQrLS*9?gzLL`5FeLF_w(X+C5!qP;3MAlO)@RV9Xw&JHp zzMg$`RItI_M^eTK8A7raD)dHO1_-t!i<)zyk?`I1>h9i=&*2)CJuzN#Lb3A%Ul~)k zM6J*2M_+x_twBiiOJ!Lr`;c&7nYFWz?4ed=XvwaT>Tc$-BzkBnL~_F~mihiuiZ*d) z73?E}joFgYggZwcvc&VM@u^r|!GDk!p_R%r;_74#QVOoA)-B(RNP(iCQjX#r1tkPR zK^r0Wb|A4zp%q!9zm=i@A3*E7#}Pz(8BK!b287x%4ZrU#xv}2{EY>t7;FFJi zQ5b1)Q6*vX(2^KOGI|$r05~Uiq0Mux6`7GEB6Oxg*kP2ff5irZgC{Cv^?yuR|ARrM zWNfOXy&~oDtihNG_k+|7=v)exD3{HF0f$|srF z9|OI`A`gNnBKh$lYVCp)5MqJ7J)QDS!*@^ID<$)jv@t94qMV{k62-7E-p;P( zjjA(wh8uruJ8tQ7#QQav!^(Oe#?BE`>CbULv-Z(|Pj^#NBX4ro@} z)MeZeI-l)XaT*oTR=RTJFHxccc^K0$7kQBoH)BjuZDrxkuW+L(y9C$9(`Hyvv56@BB%3_w&SY z`NN-dIl;WS(n>D7b)fh_f7cctqevi!35o|ej@Ly1_+`ZJ3jj$=^u^zwN=+px?(TXj zDf8|42{&Vt;5Io%gm$IvU9+J0l}P~L;bLP)gF1RzDdfryIRv%@SZ8eW798fAtmP|oSIT#Jc!r@fP$dQldY!SQA#n@;DNg&dhYEAVL5Cg7 z!$OTSzzz^SkO+{FbsX$=P%6r0P=EI|BoM6}AP?RL#(_;45v9IsSrC#)$*Do;>3*~9 zg#SxYv!4>4LMsmrM&qt8N$s5XYxAl6j&d$AB=A%+|K}lMW**qGqq)ib>E9*0MUWV4 z3J9y-PqJvqZ#?v|v)toS*@j0A~LUTFO2f{mcAw^+;Un zZd{EStRCU)mD_+J-WrSh%zM}N@M@>6emJ{z7nBPO?=x>yhW+S?_E705^t!3r5MD$+ zXGJ7A5-OpK@0MRw@~7Q>sUVH`~rB1-TV_UmiQ z9!$EvMcjcNFT%v}>X6(7Wgh~^2yf!9*iy95&#rqcLVBq_NajYy@*5e+_x+rsUeDE0 zA-^Hq^IyBXSJ4q`^-a{FuHDBH@h3D?JEV71!sjo;P9N_4K`;*N3*8$sW(b=gKcO^1 zKW@XKtn`(S^=cR!l^CWh^#dk1ViW!_lz8n3H(NF4gPa!Thgci#>b=^NB8t<8F|O{L zT_8W#5L(5#txgByZ}c)q@Q8>cMDmzECFz;^RrKIpC1p?1_+gv$K>WwuCsU3WYw@eo z@Oj4!%UM|MwQwml6=_G@nkNVAQLK z%F(=jGd;>bMtnm=GJXQGqg5F+t>>0*@&SaG|M8t}-qEh<=^A{`g8^Cy=qVSqJ*b1} zoUdiSNs{O=@11xiLLTO?YZ$uiz5ahCvncUxm>YYLLue$iWLqeI&e?kSYdewJ!ps!b zr5jtGJ)v7MG3>_F`&5KSsz@j@C8^rER15gV#klVAI0nzT%?41*<@JDfuwCelEvpv_ z=_&wIAD~${zxq&qv=J_n`{C67)rNDNnrfkQO*0Usq7w?o)18>Xc%km@G#S8)(sWvY zgLvy>`14fx^l`vL(~9KJT%{()4rA=a6NLV=cB0BUxr`7Ct$fH_T8 z{=D3$B90QWV`;;p^Ya?TxaOgGKZI=H0I;l+#kT-vTWo4i*%DWs!IRE?tdVl|GKAbIX6ZX`UrPVYYRHyvNKFnzu6f(N%W}L!f(M}Q)9D5+g>>F~REX4PQr3p$+$im}m~o%J#{4bbb_x`f1-?Vc_a z7n}a-sEdPC5e`Vw}A%&$P zu$PVI^Z20KC>GCRAPP`n(%Uk_g~5nDvzlTmUtfk_TZjsx+^f4rb&7i%8XT3wu*YV5 zEvSlLf;fHox`Cb)0B**G$;r|LDY?xeCN{3g_)#f;KF^B_3GoIRae|~1ozKjIz4F@i zolm^kugQKFt$2Lr!;g;Vl*J;4^=HKN($0YM&-vt`6`OmE9MJZ6fzUJp^Y#ZO1j_yk zpnIo&MT8KFD|9ObbC&D^Np7hRF&we~!sZ+B4|5Jt0CSQI za4TJL2`V7_14bi0FhH`7q-`{R*58ZQ{U$~xP5~j)2sr>YepWATk9cXo9b~9Rc416w zzj?4{;U9x+@K(kLmLVUYk~d{~9&up*in&}2{eUUi+*i`@i_pbx(J;X*~m7&v^d2EOfS1`DrEifbhlfhA+8c>u#>tQ8PNN)!tVffV}(g z>|1fmPl({6e2{<0S1FSqj?F?Y!QkGhY$)|l@62LiGD1cEgilW<#VKp>N@0Cf7h3$MUqFj}Il0{p&LbJ1dl#K#`^e)~y&PUUC zs%@JfBv>10@16zqJ$puC@pdXlg>r)PVw<~}Wh_e7UCj5>70&+)j=+z>_m?3qg`CmN zB}k?gsJ;q>FMsuMz}&ZRnFy9#Y-fX7}uz$s-8EnaJ;icq0IR z)zpUhtUj&SxREDsbGg7gD8(-*u9_o0MvL5?v+nGaW()acL}2C5XHFnrneBwod|4Ea zw}fSIft}k2;%4DJaGt}J!1i&RuMXFDm7{!jD}qbb7a5s5ob4gxQ+9mvO-S>{2tW30 z!Vgnf<%>t}lDXX>jFKj=DGl7o#auFpgYaBscNp7cBr!RZgIAATSf!V;buo)GIurO_ zwe8(XqIL8{LJNw1L9N7Y9Fw<4?MA$)cQ{t|(Jtk*SrjS$n62kAgy6I%XCn73d06Pu za*>lKHD9r1hT03of3CZJ@#Eh=@LsLCdx`c;JH@8!XS@~cPDl}gN}X93L@xXIN=bOc z;bbSQtn&v==2CXWFu_on>-Z9kfS5wyKwxne2sSZ+bsqbQ!Rv&pzFtZnCe}RF=zF5j zqQvZApVm7>j;l?#jmzwwli%WI-=kcTik z+<4(DyZng;$8rZJqJH;Gk%*Yh5ylRiJQEn90gwd<4td#yWrEPmgH$qSj0OnH%YURI zQ}`{vj+agGcdN)rC;QICYSdIJ6*}<`q4WOUqGGghRsf!h*HSWnmVl{m%S=h9q}V2P zXu2pKNXzo&of*-0?%AKYr)2{B%PIDi26>Gcu90GqIA(4jn>v);XsrSE;cy*DMUz>gZy>U@~^XWw4m zqG7d|JyT`$hs9Ft3SSEW^Mq->cem)mCcAc=9ZhFL<{(#JGu7&Ad{oNu)B!cD3&sfh z3I|dqW+KuhqNllFLe__U9%a-nOIyO8^bhpCee8cZfeEY_JJH!C?*n=1|EV0=9HaCQ zfkL?O154@qeO>8ha%h=jinUqT7F+O%BgXfLirrTtl{QH-wuvaDR_MfqR~7qFN#)T& zWafzKe&7HZG(~WGdH`<0rPzU>xb#Fv!Rw6~!x&3^{qHP@}*PT$3vsB|?OIw?grN;U|%#y%R zetE7&LK~5zxks-Wm)yo3wU$}6KN9N`Fq=C7s3^3Z@cbj5w9kB+HeGl6InqA$XQ>#E zhS~BM#N+@RFqAoJBCq)_X)>;%WBuO`UZ)$(*TL(Wo(24jW%sn4i{B>}AH+%b$kg?j zOrwCsn2GOLO%C8|hR$+(Alb}GSviNBosg2%>LBkWK;buJ6 z<7^K#$U62JMka2Ut`f_~NE^C%NE)l161)(GZnK7XWgPecqk|nN->@+)%`m{W#)*Y)w?ZKia&Dw~)Z;XvAfI~)3!Lb#k>6>K@{XxR?9_%3 zzv)GGKKo*HPg&LaM8bei1Qw;ctm{Aif`_98k5p#_&n%0$l^_3wo$iRMze@V+AZuU?QCsC}sRfWb zh5gP(6r!-mf)~0mp`AyJNm`04bVRWUX^Jikhq$Hy*p+j8((iaYboGYe7a<7BckvI&7{c1g z-CFQR4dj~#3klm24MtSSjauH#jSbFo7QlU1>EF(K0MWVhCg$K8LA$#3NVq;hL?Q2a z7=kyo96kz>%8L$gyZe&aPzSrhOcAF#P2Mx|?J$i-Vn8$bVKK@6uA7Du8&d!zWX}um zBpZI+U8;u3ab#HaFRt9Y0VsEPGnI z`JvP@5v<@;=k6!SoLb(?mWcsP6FHhxIoN@*A;l&YSZmAaeF1y(UQ}=u)SivOB&L5> z>|E_PRqBOK#nNP4D%&3pdNP#LQ;5g+Xa8r$h3=(4f5tsa+w^* zykGvVq&~h0pH;-(3a-=0P{w{B28OMXyGFkQdd;Ds-~3vKPcV#{BGVp-^E-u@npvt3 zGNbZ7;i|2O1}O_jy5dCs2VuW~j6G1HZXL&869M0lqQT(V>ZvVN11-4v0*NTBKXR%S z&L90xP)g}gL7#!BcQC5mWca`F53D=<${8{bGW_XMqar$FzB!=aE-OYRa?T2Sw~PH6 z3j_lJ9Qt!<5In$R+r;{)A1JFzc&J*FsKhF&$REnZ($4M{NU?8ov4RK2Z=)u9M4*Xb zsD^#Pi0!T914=j?WyVI7z9V4x$;;kAT2V_Au7-JW!(Tn3zqIxh3tA%4op=Qln6}nb znz;kNvoSBup=nb=ImjDWik{JUibv_Uwc#47n;J0>HZZ^q=IR#>)j20-!&VbCg;qJ5 z^5_mwaD9oG0Q)HOPg@&O13B_CQi0VpWKinUhX7g$YT5SY9QtOEIh6FFDw4jrqwz{~*M%%mG*HD`nZYM$^Q|(uVEkku-BGrloW-9Rs z*5a{i0a{N18~bY*C#$Ql01Qp25zen&T_6L*fbb7z-kh9CVf*Q+_@QAG`UV?-GT7Dx zxa>7PX0FoX@=|Joz&-oN%)jZOAjLrnx|Ong2t${(Qa7>#O^ctht&hO1Lj{>D?;k1= zy~2y1`V#7n!wjE(qS?EFc~udGXcP4g91!Q;57Y{t@A566Ddtl2tL`Qx7!wlK#T|Zd zciUxVXO72hH{AJIN0jBAN@+L_Q@2rdgSpfVd9H!^idsU=@fr9=fp@k-eE~8zYFLZ` zy}bE4w3!!`8HwlE+FhPuVg45LU+fMB@QP3U8&z*`^;$%el5-^KJ`}bDLsSxe(c2YV zPX2A!c3mBiX|2w&soNvo(P!AUC-W(7z9-O^#gZy<12x*LfGL86eLbO*vDm)7^}Ua(8iQ1h^B0E?Q))>~Cu-{Di^Fj@{Pml)*eE6H+ae2lp$yg3A2=eAh9~WO7fd z-#VUPK5ymw5`jIF9S#{DC!#BTB@9U!BA7?S$Fv&;wDQ@&wx0OyH%yC z`>fyTVnE85bL8TEs!J`&SfPM1i|%6k9Um86c`he-9zIA4tH7dNK@A2hi8GX^kPIe; z>=dJ`*;(!}bMcwmR74Xx^Zan&L{m^>-nx2llM(y>i8VBYw|CIkIXhHke#o;`4t7JK zd~o|N{c@Zg2nEqJ*dzdvW~0xk{C#Szty z0?%itn3y^NzM$war=+ALMvSX*6#ly9G+d=KiGg|?UsfM1YBHw6c~y<9JYeB>8qG~I zNM`y(LGc!cMIhJ7pCK_~8^m{qEc1l1hGcv|A)?xA4%eehLXqu-S?3TcpRbJNM)rE zgL39-a`b9P;L7tTlcDwLPKi%9`0)L_Y+#VgBoH^i!i^G}Uzfc_GiF6}Ac(sc{XX5R zUN#1CoK_;ix(&F3$Wqb}p020))xyAJAM?VcDb*l9Zg2n){;At1ix{3`iX1{Pad3}T0Q4oy%Movc~O-Z*+NAq&1 zkZ7#TV4cl!CfOXgrVIzBUa}_Lw@z!PY)gr?Qpthsu68nvN9PH@R>RQaC@xG3;Q030 zY@4Tzu8simO2xn3%cccoG$JvCuo+;ow~5h+zF`>RrhUJ~$iwfqnbTGR2E3&6k`ZPd zgEvrVDfZgrS)qC85DydMO6Ds~`GWVNW$+}jFfnyjLJ+ox>LKAGmj-#g;H?&MS9(Jv zQ=p5c>UpJLQbeZS?%|V33ue}cj)Wg9N9FDQEm7+yiSTx@sV+tIn8W`gOT?0m2xU{b zKI<C zhaVZ8Ik|vF&q?ns1S;m_fAbDEIoB)onTaizekvJ7Uz8piE}gl|N1@$L2ukD08N;YV zy5ENqoxu@m4FWbDQd|q9Ss+>4{p~s?Iw&&J`ARl78{-nzh4UsUj;AmQlx*k0^_6?yi8yP3Zp*!WI(C;f6rF9f%VAVQ!lbmxfgAT9a@=tNm3q zi!qW>@4eiheYBj_WUTcY2Xq-s>Fsy>^Dcr`m>4`y;ipB`EeIPIIsDsFDd*yn^@dfn}^093r(*1 zr-<}Lu%&4PcU~{=R}@Dmu~^5U>kJY|u_qWenu^kh_f}p^X~)R~zrij()3KsUH44m@ ztGnY3;XiT7M^Z}#`%d6B74V_a5ae|*)?ncmPo2Q%5qon=&jvc)%cAt%PBdH#9XCg_ zxTd?;<%;^3Tm*X;ZCkK_cditi*$n6H%`)zfO_x(3644ARSKj_eN}$@I(gHF^*}Zc= z5XDhLmg2f}Uy{81U=n*&e!qY8xQgJLffw5bnrGogL^)+;UTOT7qw8t)eAVl4V6K14 zX4g23>L5qbGY-(KH;P0#NkW5iv6pS%^G$vGY^CLau2h#+U*&5Ai%NBx=#CM%TwJ{{ z*NQK>6Nz1%5e?0Sz`iWz!UsXlo{%y&NLa*}KWNN_;$5cokRF+do6=Y;N z^EY4Y)Z~HEZ_(1BGhRkPj8G66moKHrk&P#vt{m?d|EsBDR>TJoN_}#Bn-d~w~CbxR$=RCVozajo+SpX`ml$0*{b*og#n|a@?=u!_58P4!{JubcjqT|HiOkEZaZrRIXqpp$XY}Q*3CFj_T$w%9PaR zTB#~p4gts8%XLyv(vC-rzaJX#o|F!Pdos$C3-``wmD{AEuruJ9Dgk|C)h!}X%#Ee} z*4@Cq9uFRaz)*m|I+*m;Qd|j|xDR``!?wzHH957&^92;9Y}$F$&$E%qN4`arA$*N= zvX=_z6U(y(HOB=`ePZwe0sR$ojA8?tA2qTMJW8FWSx3KSl0IVuMQuH($mU;h%iI@; z`MFJqavAiUlm9q9X@6STqrjnJak7OjP9QKz@pw>!9dsP#X|1yI3<{*#Do(Ks&BG)z zZ;$FSUBQJbEVP%ik_?MlANr{v71qfB0#!ton#+XRrh%Uv*$7A!RHge^7fc_h76xum z1v+10g=z7jDCj(@OK6%7$pFEQBblHD-u#H~+NFuRW&S~MwK2&ElU9+7gh@6=e(r

B6*k#?)mp<8j7sVHPyQ#HYDIQQPrvq_TG<)I6!6Rj>Uf=2C#2O z#pt-YVCzlY8wL>%fv0hZs!QN7(ZidU7xA1Chi)vUA{FreD4LfQI&{i^7agE6;Q0#T zE6a;OPzLt9h)cF{dVexh- zJ4D6@qaDxx55Tf7jzaV2G2OXAtv*R8xk*`?O#<1 zUOPSsZ+AK{*zsEsUR(8KLQrLS*9?gzLL`5FeLF_w(X+C5!qP;3MAlO)@RV9Xw&JHp zzMg$`RItI_M^eTK8A7raD)dHO1_-t!i<)zyk?`I1>h9i=&*2)CJuzN#Lb3A%Ul~)k zM6J*2M_+x_twBiiOJ!Lr`;c&7nYFWz?4ed=XvwaT>Tc$-BzkBnL~_F~mihiuiZ*d) z73?E}joFgYggZwcvc&VM@u^r|!GDk!p_R%r;_74#QVOoA)-B(RNP(iCQjX#r1tkPR zK^r0Wb|A4zp%q!9zm=i@A3*E7#}Pz(8BK!b287x%4ZrU#xv}2{EY>t7;FFJi zQ5b1)Q6*vX(2^KOGI|$r05~Uiq0Mux6`7GEB6Oxg*kP2ff5irZgC{Cv^?yuR|ARrM zWNfOXy&~oDtihNG_k+|7=v)exD3{HF0f$|srF z9|OI`A`gNnBKh$lYVCp)5MqJ7J)QDS!*@^ID<$)jv@t94qMV{k62-7E-p;P( zjjA(wh8uruJ8tQ7#QQav!^(Oe#?BE`>CbULv-Z(|Pj^#NBX4ro@} z)MeZeI-l)XaT*oTR=RTJFHxccc^K0$7kQBoH)BjuZDrxkuW+L(y9C$9(`Hyvv56@BB%3_w&SY z`NN-dIl;WS(n>D7b)fh_f7cctqevi!35o|ej@Ly1_+`ZJ3jj$=^u^zwN=+px?(TXj zDf8|42{&Vt;5Io%gm$IvU9+J0l}P~L;bLP)gF1RzDdfryIRv%@SZ8eW798fAtmP|oSIT#Jc!r@fP$dQldY!SQA#n@;DNg&dhYEAVL5Cg7 z!$OTSzzz^SkO+{FbsX$=P%6r0P=EI|BoM6}AP?RL#(_;45v9IsSrC#)$*Do;>3*~9 zg#SxYv!4>4LMsmrM&qt8N$s5XYxAl6j&d$AB=A%+|K}lMW**qGqq)ib>E9*0MUWV4 z3J9y-PqJvqZ#?v|v)toS*@j0A~LUTFO2f{mcAw^+;Un zZd{EStRCU)mD_+J-WrSh%zM}N@M@>6emJ{z7nBPO?=x>yhW+S?_E705^t!3r5MD$+ zXGJ7A5-OpK@0MRw@~7Q>sUVH`~rB1-TV_UmiQ z9!$EvMcjcNFT%v}>X6(7Wgh~^2yf!9*iy95&#rqcLVBq_NajYy@*5e+_x+rsUeDE0 zA-^Hq^IyBXSJ4q`^-a{FuHDBH@h3D?JEV71!sjo;P9N_4K`;*N3*8$sW(b=gKcO^1 zKW@XKtn`(S^=cR!l^CWh^#dk1ViW!_lz8n3H(NF4gPa!Thgci#>b=^NB8t<8F|O{L zT_8W#5L(5#txgByZ}c)q@Q8>cMDmzECFz;^RrKIpC1p?1_+gv$K>WwuCsU3WYw@eo z@Oj4!%UM|MwQwml6=_G@nkNVAQLK z%F(=jGd;>bMtnm=GJXQGqg5F+t>>0*@&SaG|M8t}-qEh<=^A{`g8^Cy=qVSqJ*b1} zoUdiSNs{O=@11xiLLTO?YZ$uiz5ahCvncUxm>YYLLue$iWLqeI&e?kSYdewJ!ps!b zr5jtGJ)v7MG3>_F`&5KSsz@j@C8^rER15gV#klVAI0nzT%?41*<@JDfuwCelEvpv_ z=_&wIAD~${zxq&qv=J_n`{C67)rNDNnrfkQO*0Usq7w?o)18>Xc%km@G#S8)(sWvY zgLvy>`14fx^l`vL(~9KJT%{()4rA=a6NLV=cB0BUxr`7Ct$fH_T8 z{=D3$B90QWV`;;p^Ya?TxaOgGKZI=H0I;l+#kT-vTWo4i*%DWs!IRE?tdVl|GKAbIX6ZX`UrPVYYRHyvNKFnzu6f(N%W}L!f(M}Q)9D5+g>>F~REX4PQr3p$+$im}m~o%J#{4bbb_x`f1-?Vc_a z7n}a-sEdPC5e`Vw}A%&$P zu$PVI^Z20KC>GCRAPP`n(%Uk_g~5nDvzlTmUtfk_TZjsx+^f4rb&7i%8XT3wu*YV5 zEvSlLf;fHox`Cb)0B**G$;r|LDY?xeCN{3g_)#f;KF^B_3GoIRae|~1ozKjIz4F@i zolm^kugQKFt$2Lr!;g;Vl*J;4^=HKN($0YM&-vt`6`OmE9MJZ6fzUJp^Y#ZO1j_yk zpnIo&MT8KFD|9ObbC&D^Np7hRF&we~!sZ+B4|5Jt0CSQI za4TJL2`V7_14bi0FhH`7q-`{R*58ZQ{U$~xP5~j)2sr>YepWATk9cXo9b~9Rc416w zzj?4{;U9x+@K(kLmLVUYk~d{~9&up*in&}2{eUUi+*i`@i_pbx(J;X*~m7&v^d2EOfS1`DrEifbhlfhA+8c>u#>tQ8PNN)!tVffV}(g z>|1fmPl({6e2{<0S1FSqj?F?Y!QkGhY$)|l@62LiGD1cEgilW<#VKp>N@0Cf7h3$MUqFj}Il0{p&LbJ1dl#K#`^e)~y&PUUC zs%@JfBv>10@16zqJ$puC@pdXlg>r)PVw<~}Wh_e7UCj5>70&+)j=+z>_m?3qg`CmN zB}k?gsJ;q>FMsuMz}&ZRnFy9#Y-fX7}uz$s-8EnaJ;icq0IR z)zpUhtUj&SxREDsbGg7gD8(-*u9_o0MvL5?v+nGaW()acL}2C5XHFnrneBwod|4Ea zw}fSIft}k2;%4DJaGt}J!1i&RuMXFDm7{!jD}qbb7a5s5ob4gxQ+9mvO-S>{2tW30 z!Vgnf<%>t}lDXX>jFKj=DGl7o#auFpgYaBscNp7cBr!RZgIAATSf!V;buo)GIurO_ zwe8(XqIL8{LJNw1L9N7Y9Fw<4?MA$)cQ{t|(Jtk*SrjS$n62kAgy6I%XCn73d06Pu za*>lKHD9r1hT03of3CZJ@#Eh=@LsLCdx`c;JH@8!XS@~cPDl}gN}X93L@xXIN=bOc z;bbSQtn&v==2CXWFu_on>-Z9kfS5wyKwxne2sSZ+bsqbQ!Rv&pzFtZnCe}RF=zF5j zqQvZApVm7>j;l?#jmzwwli%WI-=kcTik z+<4(DyZng;$8rZJqJH;Gk%*Yh5ylRiJQEn90gwd<4td#yWrEPmgH$qSj0OnH%YURI zQ}`{vj+agGcdN)rC;QICYSdIJ6*}<`q4WOUqGGghRsf!h*HSWnmVl{m%S=h9q}V2P zXu2pKNXzo&of*-0?%AKYr)2{B%PIDi26>Gcu90GqIA(4jn>v);XsrSE;cy*DMUz>gZy>U@~^XWw4m zqG7d|JyT`$hs9Ft3SSEW^Mq->cem)mCcAc=9ZhFL<{(#JGu7&Ad{oNu)B!cD3&sfh z3I|dqW+KuhqNllFLe__U9%a-nOIyO8^bhpCee8cZfeEY_JJH!C?*n=1|EV0=9HaCQ zfkL?O154@qeO>8ha%h=jinUqT7F+O%BgXfLirrTtl{QH-wuvaDR_MfqR~7qFN#)T& zWafzKe&7HZG(~WGdH`<0rPzU>xb#Fv!Rw6~!x&3^{qHP@}*PT$3vsB|?OIw?grN;U|%#y%R zetE7&LK~5zxks-Wm)yo3wU$}6KN9N`Fq=C7s3^3Z@cbj5w9kB+HeGl6InqA$XQ>#E zhS~BM#N+@RFqAoJBCq)_X)>;%WBuO`UZ)$(*TL(Wo(24jW%sn4i{B>}AH+%b$kg?j zOrwCsn2GOLO%C8|hR$+(Alb}GSviNBosg2%>LBkWK;buJ6 z<7^K#$U62JMka2Ut`f_~NE^C%NE)l161)(GZnK7XWgPecqk|nN->@+)%`m{W#)*Y)w?ZKia&Dw~)Z;XvAfI~)3!Lb#k>6>K@{XxR?9_%3 zzv)GGKKo*HPg&LaM8bei1Qw;ctm{Aif`_98k5p#_&n%0$l^_3wo$iRMze@V+AZuU?QCsC}sRfWb zh5gP(6r!-mf)~0mp`AyJNm`04bVRWUX^Jikhq$Hy*p+j8((iaYboGYe7a<7BckvI&7{c1g z-CFQR4dj~#3klm24MtSSjauH#jSbFo7QlU1>EF(K0MWVhCg$K8LA$#3NVq;hL?Q2a z7=kyo96kz>%8L$gyZe&aPzSrhOcAF#P2Mx|?J$i-Vn8$bVKK@6uA7Du8&d!zWX}um zBpZI+U8;u3ab#HaFRt9Y0VsEPGnI z`JvP@5v<@;=k6!SoLb(?mWcsP6FHhxIoN@*A;l&YSZmAaeF1y(UQ}=u)SivOB&L5> z>|E_PRqBOK#nNP4D%&3pdNP#LQ;5g+Xa8r$h3=(4f5tsa+w^* zykGvVq&~h0pH;-(3a-=0P{w{B28OMXyGFkQdd;Ds-~3vKPcV#{BGVp-^E-u@npvt3 zGNbZ7;i|2O1}O_jy5dCs2VuW~j6G1HZXL&869M0lqQT(V>ZvVN11-4v0*NTBKXR%S z&L90xP)g}gL7#!BcQC5mWca`F53D=<${8{bGW_XMqar$FzB!=aE-OYRa?T2Sw~PH6 z3j_lJ9Qt!<5In$R+r;{)A1JFzc&J*FsKhF&$REnZ($4M{NU?8ov4RK2Z=)u9M4*Xb zsD^#Pi0!T914=j?WyVI7z9V4x$;;kAT2V_Au7-JW!(Tn3zqIxh3tA%4op=Qln6}nb znz;kNvoSBup=nb=ImjDWik{JUibv_Uwc#47n;J0>HZZ^q=IR#>)j20-!&VbCg;qJ5 z^5_mwaD9oG0Q)HOPg@&O13B_CQi0VpWKinUhX7g$YT5SY9QtOEIh6FFDw4jrqwz{~*M%%mG*HD`nZYM$^Q|(uVEkku-BGrloW-9Rs z*5a{i0a{N18~bY*C#$Ql01Qp25zen&T_6L*fbb7z-kh9CVf*Q+_@QAG`UV?-GT7Dx zxa>7PX0FoX@=|Joz&-oN%)jZOAjLrnx|Ong2t${(Qa7>#O^ctht&hO1Lj{>D?;k1= zy~2y1`V#7n!wjE(qS?EFc~udGXcP4g91!Q;57Y{t@A566Ddtl2tL`Qx7!wlK#T|Zd zciUxVXO72hH{AJIN0jBAN@+L_Q@2rdgSpfVd9H!^idsU=@fr9=fp@k-eE~8zYFLZ` zy}bE4w3!!`8HwlE+FhPuVg45LU+fMB@QP3U8&z*`^;$%el5-^KJ`}bDLsSxe(c2YV zPX2A!c3mBiX|2w&soNvo(P!AUC-W(7z9-O^#gZy<12x*LfGL86eLbO*vDm)7^}Ua(8iQ1h^B0E?Q))>~Cu-{Di^Fj@{Pml)*eE6H+ae2lp$yg3A2=eAh9~WO7fd z-#VUPK5ymw5`jIF9S#{DC!#BTB@9U!BA7?S$Fv&;wDQ@&wx0OyH%yC z`>fyTVnE85bL8TEs!J`&SfPM1i|%6k9Um86c`he-9zIA4tH7dNK@A2hi8GX^kPIe; z>=dJ`*;(!}bMcwmR74Xx^Zan&L{m^>-nx2llM(y>i8VBYw|CIkIXhHke#o;`4t7JK zd~o|N{c@Zg2nEqJ*dzdvW~0xk{C#Szty z0?%itn3y^NzM$war=+ALMvSX*6#ly9G+d=KiGg|?UsfM1YBHw6c~y<9JYeB>8qG~I zNM`y(LGc!cMIhJ7pCK_~8^m{qEc1l1hGcv|A)?xA4%eehLXqu-S?3TcpRbJNM)rE zgL39-a`b9P;L7tTlcDwLPKi%9`0)L_Y+#VgBoH^i!i^G}Uzfc_GiF6}Ac(sc{XX5R zUN#1CoK_;ix(&F3$Wqb}p020))xyAJAM?VcDb*l9Zg2n){;At1ix{3`iX1{Pad3}T0Q4oy%Movc~O-Z*+NAq&1 zkZ7#TV4cl!CfOXgrVIzBUa}_Lw@z!PY)gr?Qpthsu68nvN9PH@R>RQaC@xG3;Q030 zY@4Tzu8simO2xn3%cccoG$JvCuo+;ow~5h+zF`>RrhUJ~$iwfqnbTGR2E3&6k`ZPd zgEvrVDfZgrS)qC85DydMO6Ds~`GWVNW$+}jFfnyjLJ+ox>LKAGmj-#g;H?&MS9(Jv zQ=p5c>UpJLQbeZS?%|V33ue}cj)Wg9N9FDQEm7+yiSTx@sV+tIn8W`gOT?0m2xU{b zKI<C zhaVZ8Ik|vF&q?ns1S;m_fAbDEIoB)onTaizekvJ7Uz8piE}gl|N1@$L2ukD08N;YV zy5ENqoxu@m4FWbDQd|q9Ss+>4{p~s?Iw&&J`ARl78{-nzh4UsUj;AmQlx*k0^_6?yi8yP3Zp*!WI(C;f6rF9f%VAVQ!lbmxfgAT9a@=tNm3q zi!qW>@4eiheYBj_WUTcY2Xq-s>Fsy>^Dcr`m>4`y;ipB`EeIPIIsDsFDd*yn^@dfn}^093r(*1 zr-<}Lu%&4PcU~{=R}@Dmu~^5U>kJY|u_qWenu^kh_f}p^X~)R~zrij()3KsUH44m@ ztGnY3;XiT7M^Z}#`%d6B74V_a5ae|*)?ncmPo2Q%5qon=&jvc)%cAt%PBdH#9XCg_ zxTd?;<%;^3Tm*X;ZCkK_cditi*$n6H%`)zfO_x(3644ARSKj_eN}$@I(gHF^*}Zc= z5XDhLmg2f}Uy{81U=n*&e!qY8xQgJLffw5bnrGogL^)+;UTOT7qw8t)eAVl4V6K14 zX4g23>L5qbGY-(KH;P0#NkW5iv6pS%^G$vGY^CLau2h#+U*&5Ai%NBx=#CM%TwJ{{ z*NQK>6Nz1%5e?0Sz`iWz!UsXlo{%y&NLa*}KWNN_;$5cokRF+do6=Y;N z^EY4Y)Z~HEZ_(1BGhRkPj8G66moKHrk&P#vt{m?d|EsBDR>TJoN_}#Bn-d~w~CbxR$=RCVozajo+SpX`ml$0*{b*og#n|a@?=u!_58P4!{JubcjqT|HiOkEZaZrRIXqpp$XY}Q*3CFj_T$w%9PaR zTB#~p4gts8%XLyv(vC-rzaJX#o|F!Pdos$C3-``wmD{AEuruJ9Dgk|C)h!}X%#Ee} z*4@Cq9uFRaz)*m|I+*m;Qd|j|xDR``!?wzHH957&^92;9Y}$F$&$E%qN4`arA$*N= zvX=_z6U(y(HOB=`ePZwe0sR$ojA8?tA2qTMJW8FWSx3KSl0IVuMQuH($mU;h%iI@; z`MFJqavAiUlm9q9X@6STqrjnJak7OjP9QKz@pw>!9dsP#X|1yI3<{*#Do(Ks&BG)z zZ;$FSUBQJbEVP%ik_?MlANr{v71qfB0#!ton#+XRrh%Uv*$7A!RHge^7fc_h76xum z1v+10g=z7jDCj(@OK6%7$pFEQBblHD-u#H~+NFuRW&S~MwK2&ElU9+7gh@6=e(r

x z@H)28&?nz#t_v_~e(<`BD2ACL501^6P82(}!p@Kp;DaTXOU0q>@`R;`okRvP(Re4m zM+a3@;h@7CE$Jm9lsp;ZR_$+tF%~~Z37^95a1p(&$tOWqMm>Xv*piTJU$rtPa2~>i z1&TyV=`1?MPXFB_mtPJf7Q_hmnQOR@-HH}z&-5+M+T^%A3x2{8bww$5keE|>HV?b*Q~tfAZI5^ur`0_#5%Ap5*5s~ z^}A-VNYT|)RzF&^I+r~6M5!u_6?H1L2&3+L(a)L)NHr1xn zF1^d)*Dl4hlm2b&Ydks$DmfZxTsQhs5p=A$s*?WSX9)tn=-KLiUQFc9fO0$zPdObg z-CM}k<%NsRI9U>3+$gxAilA1`o@)kARk$f{TUj>7hUjrQq zMF(W@@-<4@NcCTM8#d-42cD+r;PAt{nugc_q7N3crYcf6Vt(1u=FuO}dcFT`tkBh^ zqzbM@=5-408d{1#*kSvF9jhm9Ls)cwqcdQxxlAkz|67Ec&-eFpKY8|43yg4zd?d| zmfuByQQIBGGT#41k>~x}kIZ_*`(M6t62wQ>Rb2PAessB;&rEPI5s%8sy$L)nNzq(-mQk*9RGAJZek#vlxr?a`mOInw@liux zWvG8?do`@YA5WUc>SLgpNO)x-5C89gl$AMo2z*$=d1P!hx@+`)jc#3qd>lu*$0tTiqw`-aG)d~mz{PyTwF7WcXu3`dnpkC?p{NO5s?J3 zGh|>y1PX_XpH`%tEz(@O2;=$c zgTQnskE+%p9yxnW<)tI%oFa~*4uz!wR5nZ^OAlJ=Uioo9@sM z5S#fD5`7T58@VomT;bL6o;@-fg|*A5pI23>28@mamANfM<^|ugM~-P9Q&L z6O4Ug6>4e15a)P6BcoeIZR=3uqi**giKgvO?R7PRg}9iCH8pW!VEpk>3NZr=2^jkb z%P-Db9;T;s0zTF+Icy$>)#DVT>Pk{v)Ym<(%vi}AoTd##uU>{5HtT2NxLy-3rKf4d z1`Uu6@UIKLs;}*yrv*=lXKT1}Lbk$rc-`05WoeUPDyMM1QL z;Tokh3*Cgu@0Cra(umtQg-T97Ejd-!)_-;(VngWmmiyP14U*M|EhS7Ok<2wRx!z?h z$pgkvYuO1(x~eaml(Ibgco&wBb@?`mn3~9*RYmN_=%@RMJ$I1*@3<_LpvTrxoQ)GTBFjW}L=& zlm;X<`Uft)+P}@d9mHYA`S?GdT)QH8vR&bd{Ud>`@yD=Sio`1}3onn%k&HZ4giG^U z(6^g;i#TCW*6VyhSad&QovbTjsQ$V%P~MJ&M6FY1Lzlj|{a*{}v&_gZTpRk~{?Mr0 z%|75_NQ;c|o)e`LyNsuS3%u5*(-E3mMY3k4R^*VAZYO}2^*=OHN-;NEDSj#HI`V|`l;1O&8 zBSW$DKzoP4nsY0K+&_&@A#!la#}w6magN0P0CX?5vGeDTONX!jy@saw?GEjeV1bwVLhr-&m_Se6p%*}Bh1!r$~Qp3J!7{ioGuMhq2_S8eSlEb z-w}DVF~#6OfR+GdqOL0KvyM=c(T5ETR5cHUoKDFbHO-Z~!-Qy14TiRMVJErVUK0L0 ze1mABU)_q-;dl$-QKmf-@s^*Nr?fkwh7W>WX$2Y&2Wr>ui&qqhkF0Nk z@^=#d_`qFBqRkkPU+dnf(}^-MvM01hx)R4zCVM&Vrihg(nKn80f>ga&n3gS*udx86 zpi=sSqwlRCn)3mr7yWAFfw-e@fkpmoGmE^%fRm$-88ai28xq5^sAF8QMf;{)-A`HV zUw5O0K9rSV3}WT6MzK=5Si~o0_tDhQ;2kNkZz0L5Z02Zd5Tp|fjO$Z<9&*$8JP65_i_aI zbaV-5B^H>YXcbQnuG1lFhK89Ixpy<#5VZmJsou$Js{9Ui4N;dTiCywz^Nr=MY+*+^ z(T5*1BIO-qM77zdxc7iE+Uux%VKpw(*H|^btQC^m?W$4U?JF|p+lsf1_pymKIaW#K za$QN%4Zz#5o6HO#vx$~wm+HOcSiz(FbuU3OY~bYhjeO)*0&>eZc?}86t!3VGSAzt=%}b*bmQCEMKsN^6m9N8 z;Yt@dk+WJ%z>Domyaea+Se(h$eEKn;B`Mi!{kQtUV=rMUj1oy)j7;8J9UZ-Y*R21U zYsL_nl%$LrqDljAg~1t?*m zx!!D{#}UNZg3C~&`V=n;rWUeMC7RIyIr9_y3jz#yOVSmm=z*R^xS;amPVZknpFX>c z!m=dvhvvqvB>Sx6__#2spX2e0w@9QObI_0IY&rp}Ss6u{USZWBEefLDv@Lxdu(mVl z@AN{dH``cYUlxVO)%;r|lI#pQH=9%sOBm_t^Wh6onH-*Pt=`!14>Yp=VaWpWHty6| z#*AIEAoUEIlkDXb?x9A2tN3Cp;jpveEG(zH-hbzX#_9*WJi8rJHIr{|b*!b%@He86 zLs21Py+?Uqz~ZFH;EgR`*u$3ytx^PSZat2>11Be20|wwIuL02wbu=t`j4daw=#I#XZB6Azrya0xZy*3R4skIjyxuQ|g>}3}wkREU1xP|f1iA{e z8}l|7Vw*+pAYTO+<5V(A4SqF8k2df}yWEF;p2bcrU09@WM6>pEF+p=`P{A(j-hdW#G(aRfY0mcHQh*~@ z){x}v#&tH>*2g_tiQZ#E59RR4S-RNz_0QLz39S=r!uZP|mpru)HR zz}ng5yaa_fv0D<85IC+ZY(Npd@m5SZ!^L%fiNF!(mBIg-lgrpWM+C7hR zI^rIS5QEK1+~~<-avCP({Y*QZqP%x$mzx-#8q5xUc?f^L@lPt1r43wY8FSfjRx0e8 znaHq`82+6k%+P$Yw51}T(R$7?_&a_ME4wmhwNj)G(0-8~-jjV63oi4lg;3S+=Z5w) zgSvZTD(MlOD2?eys82gr|Ir&xRrb0|IJU=Uc9iJ#2i*dFSPHT)0<0v@?31dm5E}@&o;AT9oK|tEV|}Hi7s@Q)S{;KG`U{l%MBK8iC^FE#2yz0gP!s zgp|3+gmk+RU(b*|e`oGA2?NpJ==HPwXnIE*`VC$pie0*PT2(=Vr(L#sIGC9y<6 z$XTTn8U8UixhAl>K>u{do^scPKQRiaHR_JB3v3?!h88eRk-Uy-)vuZPP=f%%88o(% zr(=(#chwJQMJyv5N&Pc5U*n$QW;vN41iI1dlJ{<#8EI}7XpSX{+_u=bAbf$hOz9~& zZWvN6&~kn~<>5gDSkdup@S;B+afhr)jZT+coopj*X}rFnIH524si7(#oO70_GkhBH z99XVNU*>^EZ{HXND(#>u2%__wPV|4~KOWI7*_@yHXhet|ehld0Ms5_KoZfJ+3CZhT z3_8d-EXQy81tWf%2$L8AsY@if=yM zs005cc2tY0F<}(^aUfYSec!8^$v9 z^DT1(mf6xGuK;v75ryZcMKzXhpA?_CfmIU6qeGd%K>D3zWoQ$ZHHq&;<${=^u=JFT zc`nvG2K3$|5fR%2}F!9|#0blB>sib0A~0R#1NfW$`-n6V(7(0w(-6XqB;J zW%C68kGEn>+wU5#MO!+34j*H&8kgjv)Em)`h5=2 zyh9=|7$Ndt@9>%73gmC}jnpbTq(}b-uoAhua5lexcOltyi=N1P%=|Ej*2_3rv>R7= zj;pi+_%>q>LdK%NBF)>kdp;;s(auWV8v<>XkLby{1$@cS;UzOS{=KfEb59%VA8sUd z^hx1}*r!Q3^%m7$G!!0cGC-I>)^fp?lHTrN&iBi}j^fO=miAKGhxmFyuhJ+YhW~Qq z4Az8^1wz+qDVQ_hth_X!Z#}V|)!J6&JS9Ft@-pLqPTTS@b3jG7w;0oz>rs~k(|)Cc z{C9ucc(JJ(k8M}Yf^?xzJo+EQqwSvw?z)NqMKe!g-X?NOy=f6PHh&x)Z;p zuA`plvWOwiE)Qu%*!OXN^YWeI1tFac4rZX8wRg0&0?cJ(B~y)n9UgO10H=h|M07gB z`j(tjFDt0}V~i&X!w1U!8)|e+AYU<^at8H-5gBu%CRt%fr>Z|4Ze_BOw==&ZS42`K zfBkGi*I{chq&b^tZ_xGz$SdqFMokWcEiNh&g#dDhV@|*2FA9J7@-I^`>Shc+a;q>? z1c3Qtd7$qvai#mVho%Drs23^u8qdZ`kW+O)m>SY%l!oeYDc;+1Vu;oY*#({z_r;b% z;^12tDNeF*jiJ@FD3r|48QBNDyKy`x0h`xIGR@vL@3$jZ3!h2Nsh9*#zZj$z`|h{$YplhzsYk^WdL zR98ITqF+o6ckNZ>-TtpgfH6R-cib}iPNk?BhRj8G1HE$!1|Q!M*`A^{eK0qdx=sOX1cpR`ysY!!!~TJR)!{F$x^#b4siee1rqj+ zxN&8UX#Ig>&~W2B(ehWR>d$P$u*(io1ED}p^=?c{$;P2QxWFZDxb>KF$BaId_i z!Yl+G%meT^Cko7zdMI?VBKQtBd07_AZ*fDkORX9Iuy-~q`n8`bV0HNyGVpA(Q|_pn zDpsUsvFG${6yctEe8_Imk!br8*@}e&^pnfk9=ZLVUiG zJP%g(6~50fbXrf7jlj3s27xWU5T06n6suPcfHH(IF3BN6kIQkJ>VcG!SUlh!onD;e z7Rb#+>Di||PB6#s$zROkEi0ZNX2cP1YL4^40Fn!nK9RfOP`(s5%@gd0?tY!d%OAYh z;yCn&a#?&BJ;90xullKZl$5p*Jr#r5-|P)`)w(d6!+kPRl*e!%F~-jz17%0p7L;t^ z3>*&ScPdqAEFSGh05=dkxg1B#Z2QQ6Z0T=2XOzO2tI2&ClbA4vcrb6)KAgB7P{`4_ zT=v(n(?}ii#iU!-IvK5fKCSK9yO^bdIZzWx2Yvj~)5^5llxTQ0Xmjn;81`L`hs;%& z@3)rzZwOOthJ&cD`XXF2=2%N3j??VbX0YK()TAsMg2xqHbS78JC*^yb$mApixVcBe z%=1qoBxj@^2WD0lHw+5G%Tx1X#!ITX>j|V%q=3);(9$NY^_hn$CT0tJT+0-*i0%>T zKsv33LySkvDJmbZ*4aHo_vcb3P}?Sqa|)5Ar*AkFC^41rGY9rfD}GJ3vq#+?mHI|h zW$J-6th9iD_0$XJ0&ZX3dU#j_YULdsQI(YJVz?5(>Rw{z@owaZ+i~pp%>nN(Qdnv< zOP!z3-#~aQQF`v0D~02cavf!{JAk0weVzlUJo|2m;eH6k^anj7%n`||fHH;`; z-e5}jx5aZu@fxei4q} zk>fF_A-$fa2Or_NpKhCtCW{Gf!oW=lvOb{g1P6+0%QbdY;;lb2`dX)BWw{9;ZuQSx z%3obtRGu^DKC?fMsi$EGDVc$YF3RpDg7^@6S3p^+`ybx$wp*_X!FQj^`CAQ`+bPC! zf(u%JC+KeLg=lm6vj9s0*|6<}*UpPpoG8RO&Ti}C?J@!Cq5_~QOdsD-@vonBTqiu| z!TTVS%OgcZ^0h;p-|80<4yhxLFKb>|Fb;Db*C{$;MBB2>r(aY0spmiV5%_VaA{I!c zVTTiGJu0ARs>_#0#$=Cr=&Esx6P026@?Y$l-;!?lx(lD)%`kE7OVURJh-l3iQ!ktvw6#F~n@`iKmkMhF#UMuvy!OzmuzI z_{w~v!+2FY%l2ufj{(^CPLzQ$RK_u(RWZy08hUBKm3x7Ew*W0^nLI#IV}#&9%E@{T03_Y~=8CR=}lnqTu=X7kkI0)k1CVI7@o~uzUMFz&GrX z;R=RDnw&AWz=HKtOh?31%GezDn_Ekq#&WaKc=3}y;6nj*PTK|-n2bA%#-H$cBQUz& z_&YTnBD+vyY^`E+RAZwT7HI`zt;a=iswKke01JOFTKq4fp~3W?xCO0YrcIce6xo7y z-@!8IK!aL1kfxK4;l3_#zKkVgb>Wm?QUp6K5EtzUgVm$flv-0VBclFTWgwvxvB?vdASzbT4{zbi%Sglt&FmDByTYo8&02zcCu zW~971AsO;Y5M3v#+caymLjGgUSqo72ttYJAAqnajp^>xRC}L~k%QK2c(08nO)=L1H zyP>H>1f(I$cg-kaRRm^vT_X&H31#>{R$*hwaq`J5UHw$K-!keI^Vkd4sc!!MwWL~6; zUwXPKU;J@Ehpr(+p0XI-kM@ZQamLaN?zy%3d6iiwxqpc6_{TJ=NkX7>d_jCkfi>Q| zI9<_w z%F={#ko(b$gitZ>=_2?B=#GFGgchRRbmCFL$kf`(O~`#O70HgheYlx64jqL7fX4@m z!{z1g@+<VIW+)!yY6WVN#En-7>$vcGjZuOBe{Z0^1ZY>P!U-X47@z= zV+#v8>zm66XeeGD^Z4SkDQWJkVInXK)3&3W)>9lH_o0sb(y6>^(lL1uaiUxKKUeYK zCA}(1ZB`_?2uL;s_tQ;Pr*}fxaZ4oT`LrGueL$E;P)69VDdJmdoYU~Il2*P>$4iH{ zLKe9St#D<9tZ0dk-X%4e50~Lrkq2UEZj3Tp{bjdz&~c*k4fQi)e;F)xmPNF*b1E#>RlfsF~D0bh$c8Ds|B0_-(a&AK{S(|)LrJaFLt z@t+Q`D&%MWE=i8H{Bi>F=SQZ)Z(*>q4*SvF`^Qar2KQ^M zkD-lzGPBci`BC@i{lH!uDmJWqno+UE$W^!NqvHs)TA50jksW+?{vtDMrevpkA1i;r zo_F2zTEY>x3>||QPu@V>2pxLb9t!pMqUm-}h>iFa=wl@l{czA?L#*e>0X=2lM~%@2 zBVL;UfFk79Y#vWzB1n~jeU`BC8Ar=k`f7ivBW@LgekPa=Zz%o{agz%Gv`btqjkMu3 zUi^QdlQ2$(?JM9*ahVX+;$|^^ce7Df#L0bOsG{68lIzM{vI*)&E@)3e?qQX%19n&u z^QcJOxYmpVXR7xGaQ$pB-k(%U_`CoyK+eC&MY$K&PuR{s65>a)7vOI^OuE>%lo#+b zP@$Z)Fy-t~h4S|A?GD=u_+P|zVTAkg!lQ;m>#IN};$GJip%1#Np;_GTK+MUzErdo} z&RoHQAGr9;6eD!70`95iz}LBH{0|@uNZ^|>^Y77d+NeDLUPTS@xmT?!jP&wX-o zGL_C7|2On=Ihfh`AiU?xH>P$e&|c+J#_X+4MCQ`JQKYsw)Py_ccwwT5=d}|WNu~`r z@}%CfDBH&)3YCl{FZneI8v;hd+7W44L}WZGT34|T3$S_DeBVfgnr!1-1>LOAXD?t6 zs1|h~Tz0OfF#`@0-$bvseQwY6*AiE<|HsY}AJ14g&i@(DH5QZS9kfKIj);;j#%kTB zvyuwFO22!>ufuRWhsk2+GDIFvFGu(}S@)W%)JO}{YR`qAUrUBAOR7s_=Fv2w+I+gL zv4x{}HqmF5B(GzloM(r!>!r17RAtFT2rk+GFwNCN?Yf#PjAA?0A6YA8YbOUjf*C2* z{QEEtZtfZ=5ps9&yqH2p(g)4dzISMH_Jtm24D__P5`wqa;{dK(2raIQ0S2N-!vSw5 z;cM7i0w@?Pcc+Avv6-aV;dE%#N0Q^ece8irZ+~T8Ai#Q49=xo4&$Q|8bh_&Yeu9)9 z_EZ86v9chVzhSZGw43$$0CM?*5B<+75mz(PzB4OfcCkL-TYy+fJWkRrjXTQSR$zV< za8hqG)XlewFIzxoy=tBhIB>E?-{Mz7ya8=G1dSmAW|b>Q<0=PF&V4kt^j9CpTu6k* zbNZ|){OW%k23b{M-aQvimJah-Cz}=ruT|$e<{s?MpLMJ$ZK*YQ%q?@XO!8$wKuU z$(Kxq7;SWA{Bm?{Lo2hDksT^bj!vwq1u~;wF7KBS9d={(+qkfk4T0cfjv@Pk;#Pl8 z^m1!#rKc|jmt2ViovUgNM^)9juGPZo?d%0IVB&sdYpe${lRvl01i?nPb+UN9I0Cpl z76o;kpi4+lm!n&f<~zj4YO|H=yfs_PH&b%5&Vt*u>G`$0S`E!i!t` zrHkBIX-n)!sdkro~e0CtfRd zCu+HpT0c*!1##!fekOo&Gx??+wi3TY<)^biJmCp+bBX(QpJC$S&%N_JErjq7WX%}OfQto0 zETte=lq_-eN?;81Tbad|44f3by&yzj(DRFs0zqZ`6hhbs1 zaw;;1cV?CLYbR&$HGDXX%>{ow+1yNs9h-4b$1mOb=(EQixsEI84sT(1l_qpnm_n}T z3z13hB#B|cBVx1b$`ZdR-d?V^_nTP4^gX(UDrN8Y` z6KPd-0P~Lm?|~INUBXQTH+@p3?yL`Azfw_YB<+N!x#k(q#{i)tf@y)2^Y6`eGU?}a zt7dmFJ99(L48WKju?z4|V&#<)-tdxR1Ilyazt#IshObWOF7=583Ztf4xE-)F6XncQ zsfVGyfT37uCT01?^><66<;~r)(-y{qdBgPKDbJLMDjG*$E)P%!;LpubK@)vrL!XJ+7kExD013m>W8pJ7@tSN9*M5U2SIyK2=+y{4CZV{Ul9KRPk!2h zQtzlE?v(#sEQvq71JN7D3`Bdix~pWRP&*GQwJ;=T1KNdTK_-nw%1Np!=v%*Uc9u$d zS83@K9Wo(OUP|~`FKjAEB!x<5#(j>6GbO2V>cKsBu{&F_JMj&sbh)f6d_63h{#JNe z#ZILK@b?sbRW{WQXoiudZp*`QD(_+qnKscS7rI3BoRqI%_WO$>*Z}5RQF`>4zNqVV zA#inyvLY1_zWe0vujS`-i$e@1@@yqyX1r58R`I**QhW0P3?J!=&1g>v2N#NH89Jde;J@FK3WBqeZQu;!@>P+o23m#50p?sgmhALK^`RPFQEfFpEm zjzs2mrArUch`u?KSE*9)tH#4ae7>!0 zEkm8My&ZX(Hcz*0r&|-ALBZo0uFFXUYQHa?h)L9UnEnoCsxCd=4L z1bVNj_3#i_f}9`8BnbMX%S7?;xczJteW-sKZ-7=gw&j`cSh!OW7D@_LS-u2tN9o(> zE!v+>>gITah_7;C>oel%pEp`m>b(?i-lZ#dp_QuH_9z9UirWj1Y^L;28z{0KA)zgc zPDc9*&XJHGiIhes2X9AKKmMCLu?T^~tNRGL&k|le8`*f&II02lcXsXmyQ3l~9FbY8XQnETjyZ!B(24~Tdl0sgm`)Oo5E z6i1CN0|&cvd`>P(nQq`u3Xsx@)*z@F6cve{G|4at`kZ{c(y|*Qco^Zf8G* z!^zuLH$%S{Rx#DM-#tth*SL172ZFwK<*K^{E~DaQTxa+1hPgK>hP;Bm{9F&f2l_70 zOWc$GWh~-V0&g&ej(ALZDbSDU6$LM~NLESGWvd}}YbFubvylS&r24W)qK=wEO7t&q z_S{B4N=F~+F~L1HhjMGK@SyxE@%kbYGIRt5Mi#p9m}gZ|zOK@kdL zPnJjZQ+NCK7@fKB$I+$xB$rwXB5nr+OdXeKu=WoC>P0#mgMN}+C(A1CfK#Rg1P~B2 z%X#R#Qk!d$&o|zN*U8%;?CiPmgNG+C4BQaba~$faoXE5!r@}k`N(1-vs#Nx#$$LY9 z*y*&3D2dO4$Yg|53+I1%g#n8OVd-N%G(9YscExl0Qc(_3%@Yr(FlyBgC}tC%&o;na z>7;L2pLf#>?I$)igAw;%GEp3~a=A=J8a(5jKy4_%_M#BUmp$xAfSSrtf zj~DahQqZ!{bGYU_aK5%b<^G;u&1Lle)!%UY1~@Lz#rvSt{$@w4OhLQ6&+JixfdtVI z)ol^aOB0RV{5Qlv;9TmxIq#P{?WmGW8Eqt+1M+B}e9BaVV_}*W3 z)C?U_EZxZ;EsyM}-l`Meb!!irmnJ|ToTZP9qhpI9aG2XdvWrx`YLqEXFxWUAKx2*) z^?Ch5<&< zXW$z z89+|y7f|WG^|M4PB9;j{Vf`$YBN(PqRvvt=cMER=ipclO(+*hLracJ5Zc6$$+HXva z#{pZb=8qYzpG<&4iEgv&U-iL}WI=H^X0Lu(5Q*N|^Znqm?ASPv>gJg0_jMA&f+~g# z`aPy6c}sXaaH;w$mIR>tXrU`i$X`fqb|POh;T5-wn!GO`u;dFAg=LX)uBYlX%@!u3 zo0|57PqC-`Ilhf>40zmA7(NK%J#JVz0I7fXI##Lf|6#)*+#HNmJjxx=h>TWS>JC+_ zJtlgi*?BY(zSFVPa^Wmn=$OSOG%`g4hP+c_%>t~=#41=-c& z(}~wu?=}l+XqIcO3b=C=IICIc@IXwGYu9#5uUPLnHbh|r8JyLH3q1CokYH5D52!4o zJ)OZDY9#&eAy|1aH##<&@tEkW-Y*EBlIqcqo~WQ~aI&^nLiM&gd?+Z%a;|}YjKtr6 zpl0((paownhoH3@uci}2jup)rPB}S6K3+vaD=dldEPmo_1DtJS`NdBK54m5l zpFk7&g5ROsvG@yR4tG-X>}d4|SN^6$BHtuxcmIoV!g73dF4VjgF|0p*B;`i<$f{$r zOtrFIw5OvfkqxifFenl$gyIykXoHij9v){jKGmmaN!KGQV*$M7GD)QDm15=)oiuS_ znu)(F@N4?$up?!{-HWaSozo?w>H(Q37L#;A7q@lv!#Fm>HW2r(P7g*GHjJ&n+yMfU zz`_0ub(1Fa_Kv>fm{(|Kx&!g(`LeurgGk~;sjl#KyCNh7BH#>G`I#n}7;=U(q+B{~ zv#?dNK=!3Bu1EQNVXwJWw;r~JpjxU9-M@UqJO`e|*Eo1QM^^pBN5sHMEX1tQ8rGYW z&?><$WEFm_o%2fhvy8W9T0OypZ~jM%wl`fy;7WC0uO>cBztqqr{PwDVO{iFVJ{a{* z9Oe9N>8w)=0QdXu>086h&0izM;J@xrsga(%mVe%r9`!#$;Kd^a^d7cK$$`FA=hauk zA!cENDoNYH=)kXCZA7=x!gM11Lem{{()n z{$tdu4ZsaRM7QPabtgj`DC2LAIZf)OBH(NAM7D_^3aVv=+apAa;B6CbWZmV$@A9kO zkn@x42GV5ulo6cCJq0=A>E2Vutaf;mmCtEjkQN-#L)qfK3cPa7 zpM<^3OJsVX=g}Ksq|Me+8H3d+!K7Z z=EJ(L>q^sMB!&9kGr7?He`&B?i&x#ieShL$2B_L>nrY5@%BcCa_WGNz!v(n_mV0Gi z7Gm2Q`X_j(!xl)Z_q(I|@|muQOt@zB!Ru1+0-qvzjOk1d575TUS%tniew-C(FYRW? zK$4}o7$v*+BHHJkY=7?0Qxj7lI2f_er#Ihf_yF_trN_uNgE+b2Is8mf}-2#-pO z=}V_Q5dws#bFP7!opR%&jgob0(y6k|ltq2fFnZdV#OOrJ+7vMn*OtGdmkL?Q7k z4^f*C-T`f1)6}wr#0rHP5(lnsxBv~48m;t6aw4omKEE+?ynZb|_|XTTHn2B(rB$de znO_uP-iBuxG>BvcuyCzKmYU^qQvX?IIuy#Kp(?HzW!3~il)(q{v0b*}#t_ASVunKm$M)F;s-a<1uR?G-! zEN&-YuG^pbI1f79DZ#IrWH+3CFRlzv#PCYL+?O^VI0{?X{svMQSb**IsRyFtm9QEaa3*BsR0sd*7Eu7!`;eVgs^_Uqg2llt%;W$l7E`{ z`yo?L`giu;+zBD zDlu?P^P4#bW<6|yS?AIhS7Za)adeVj{}ENCRuwX@>Jx~Vf6x!n{4R)`eDbx2dVt@% zg==yYAy)@>H%#NLw;Lwqu@6!AM;mwI0y$FF&l(>C7~*`0A9EDg7j|;BpxETrS=gF8 zm&o!U?`3xqP{pj#vj;Y;x6;j5@C^2J?#J8pA#8T*BzXMwCsw--SCEqy?3_(O;^Uxz zaSanfiu{$2r1lkWThS2S5}X#I>h-&7$%LDqEGcEvXRx8M4e$MwCDqy`VqYfFoC6H5 z`m)^6d88{h_JKC2t3eN_CGTr(RTdG=8dpV~j2Zli3is@ODDOzuQMM!E17T8}erXgA zF~j?R!WOfQD->VAXM3%_s5!wq#*BFN-ry+t!QC*2(h?sMGDG60g%RCZN5^)LZ9Rln z&*bKE`a@f9$qfC2RVsqYKWBxW9izIM-^#_nwaf1~35q4UOb?u@PIW(~j4+|NNk+9| z-QwjBrLnrx|9&%?2?aqq1kXz|)dm^IA&`XK=;L4m<9iD44Aa(I(agzlAJlg}=de&1 z{%c_6%9g6XAfvM0J_cX?s16`+$v%$Kd4#PUrg>y3b_W-}OoCaKbOnlm@bS7ZH`9Qe zzW`$rKaKRdTHjycsyT>BcO-vZ%s>#4?N}cL1yKb%F+136u?xkvD&+O0X>7Hb$Kz5% zu`wp8=VV4hg&d_k&(Z!7H>vBN1fy_5VC1F6Jx2?)HQaZk5iQ^D{u^`P1Zavr5bA|u zh-!yKGBL)+B_b7SQbV;>Z?1uj9oJLj+1ejNM!`@MhMJpPM0a!Qg+?j`xh=mQ-;R~6 zKxO3xKzUFFvF3J->wjA-iml(-*x=3Ql zz(cZ*mO~CJma6KF?9+rdmbivQLdcy&4!01}4B=?;sbYKrgr%y)ef#lTIYK=lOI$ii zHL(e0)p|~G-!y_1oYZRD28SX*p$8qqGP>XHdZxLng>iLm^azzK`PwIp|8590?j{DV z|A|1Aw2y7TZ0oxZQc;#(eEPP%v)J_L;|+~hLwl9-vr#3sNT2;FsB@R%i37OXbsP)1=eUkwk`a)s{t_A)E>ono8uh*!m>RN0GcIR)Ek*K zUmnERaoqbv_7kD%Nz+4K+o?*)p4SO}8^@fa!Qt4?+o6E+B2b%aEaic}Pt9RrnbILt zn2hbkA>kJi;>sXt84j@oJ*MB&m^+>6z8_m#&Rewox0Z2N{*$Hubqj{6gkX>kpHxgh zGKJ9VkFtR^21uE&(!l3O+YVxjU;4i?)Kts4^lmf1g8mR~4`P!QF11WIWh+2O@pjLF zhlHKfwdR7b9P=$b#!qpM=<(vj<>04fXyr2?`To%-9xw3q^XSSNV?8LqXUYp*y?AG= z1n!&zJ7{xhr1W57LZoAD+1{#vov}aMND~BKA7@}6bEE4^BekXxabM#o0!^PyC9h7@ z1m8C!5}$MvaV2BTVxzym?IUnFJ)}b-q|ycI3FKPv)N!4@_!iSxa5>jjj(^*~sl4>U zHq!Bk(!!XKrxH~yYbE$+RYY$#jHiJh=W2*ulrW0aAzhk?Va)O$4u#Ax`+0B$(n^tp zH|oP{yRLO5{z_sPY6&^I8*hBqMT{p9Yb21?seaE(Zf;3HBLe)yxXJwmI0|JCDUgx~ zrm7%x1b3N!^H`i0Y|eAq_XptSbP8xK%Lq2ksjErr?Wz$E>dolMuM}yviR2JfELe;H zMbE(P`x1V_pl++DTr%zdNn1+Pc}7-@}%Ekz+56< zQmrH1i;FmgM0mvfH;O?s_W5#;XfNrWK45c2$r4OsR+kUxWrTG4U=VLs$?j@~bp9fD zkqB%%(3T*>)U|&IoCqD#PwmZo2QX8!4XNEAIq*Lm+e-D1Lz`O<2b7q4m!2kRUDY!7 z_B}Y#$p}GX!-%Q0*d*PH`_;v%^Tk`nzsX+F2L8f+GsO^|7h(fv7aydEgfeoN*5bNrbf?MUsts=9;?XOGfe{*;O~P`+p-itPGn zdR;=ZtqyvLY?Y}aymSW3)!F0(*&(wNho*~jh4Hqc#y2zig8{I~lsH)~ggjt0zQ(q= zBl4kZ6x6*$<-#-rND*m(9Sxuf^D!+ZZNmS4F;jWjUY!HW7-km$sQeS<^Iga729r7n z8+li{RlGov&Dd=uYw}KyF3x4;MWg#jX*7oH!9}OG`fS~dIe(!p0FTP6|NUmjZDZFQ zp&K`Doqq%^IED3Im9=!yXO?M$RmUMizbPIz*f$B>)i1iW5o~b=av6(kVTxXJ*7~nL z`(Afa&GVF$Og}^;3k-t4bgSr0i4-wh&k$wBR zn!BfEeUI`I^aJ$>bGSF&E4gk8ciL5gKW=k% zE=HTuaN8Z`eFn-C#`$bYMF=VaZF2Sh3n|ROEGu+^VHO3Cr3(Tl-M7RmnAe7eV4;NkcVGv&@!v^cp zg?=e%7+?EsG1XieP*;S9HD?61^4PQD->IuPB-~!UD@5+qne0avbvb;)c_Ovh6R0Be z(-{70TG!wKx6NT!($%wB>96g(W6EFgsytep!(Z4P*c0H*)Wb#gsy}1N;GKDNV9RdN zc`E5)wVoD@`%>=wKWDZAdhXq$OfM#qz%B$4kiBj$KV8p*i1jy~JeZ~XZ!c<`VjH%A z!KHj*wSW5VQl3lBE$<)EpJR{{i@UE|ED&>7xW^2 z;|BYRH$GeL_P#cv#te0j>i;r^`e zu*;jkMC8=aCbe^h7&Gcoh5+A8@ibR+v0VhLyl^0tkTCDPn!olbadn1L^KN)hlEbez ztgS|qw^^iDCPH6r&ul+Jw?ali%S?F?l)YA&O*5AC0+j;x_dMK&lgPi(PDo$H)x~Z^ zO6E=K?D4I4Wb2krsoR4W^Znfv@`reBC9e?L!96ICpC@v}xLfa6wXJbwGYr*dkKgws z?Y1-Zf7`JSmd63b{f8*K;&U-dmQybBshxPc(Y-F3m}--KO-9!=4{!~-@2-gt*BHG& zHwk(}2+m&sd$4F5C@I`$GGprpnSjIRBMpUSR_pAl*Ofv8EM>x){1Gj`9aI{Y{km&R zvldW;9{|e@uFRxAnpjg}d$1f+%Y{D-?O5(Bk?d4hbvC}`?nkB?GLkY&;EazUX44)X z)362A?G_t62#uHUTk=fKZFa}LUaU;05G2DR3SIM`IkCAO}Q$ND%4z$uU6(R~`Z=QY$2b+>sg!hXd0~&?M zxAdA*jijq0W?$`^IHqQs4MBz4YlkVL&4E#lsl)BJb+mp;;#lC&oA*BCHYhv;>Y=i# zVg0us0W<+h@=T7c=lR+M^$WA3}^lpPLUPW_I%e@~s%y|6Va8R4{@51uR#1i$wh{Q8>M_Ax!2%?X}AoYwX%Z{!h8WKlD5mIm$|=g0Nj_@TYR)67lZv_c>W zS!^?@F;xfUpe6-Mz~RO_Lei0B=)aADmVxLDc%&}f&ov}2sbJ)8eNE}hY52NX^LU|C zBR^GX7bJSxhC|Py1wN05NMgQ|B~)vb_Ks#9STx1x^paI}W7|q_%_v&H9o)F7wUYFB z%?(vjDLoJ%9Gj062jc^_ea}&)?Luxrcu5Obm1n1FtklTt4RiAP-PlhV9L=>{fUYAi zT9Jlbh||k+zryxs3gIrd*#NU`LBY}RtADiAN1Cfd=FWF$OJ}Q=06ia%5ZNOX5P2Ab z82n;EY0^p=z@H+>PQt?-g07bAgb`DCS=%-Q>vo=ep9$C1=#>)PU(r3v0HVCPuB@B$ zHIRvDTY`LhX}g#s(}KP}%T^V`N3@^6N(_x~6g}(2BP8x{yMkWV@?2>qIs!|`=XHM- znd`9`lQ)lIj{Y`3{JwyG`hxedD-e9VTwycLcAAQ?j5zveHD0mkGAd@2(fKg4w7WrJ6qMU`MC$iFRCiVro(&QAJ+9Yo1zfD5~SIfs+51K&o$b zxxpyh=W^W?U_)cA$Q_LmTa<9XJM8U<#R*1yN%lubqtrev zs4;VEtQA)9t5o?lGRO;!9~MHPQ{o2Jl$;ILK`gsM1nF~+_NjpPLu6vmKlIb_l8@Od z1pJtm0*6>{r2A9j4nJi6(mmwM_&mBVoCPQkJ%x5cFsj~z&!qv^1~4Mwvr+ol+v~#X zBlHzJk(*_qV2C^Xe~b){rol%g1X=WR;KEeg{V94}rm2LS7qa$HZeyfv(&1ow)8kKS zv7u7djy(~xQLBokheHJWb@8^#LC(r&#*6#MBdap{1>vrZ*S!gP^j#{(ur}%N0&~Wi zOo+?l7&JsktR3}m#E;vO7Z*m7Q><)MkH>iH-j9Z7vw((*0PL zw3-*>VE8IkQo#mrs53YH?n0 zo&9okGD!TrO$dB0L$Iw*lhcLaCOG-@BZ?Hrmgh9!3)Do|{RM^z)B+r^FIW4OUW;fH=Wjk0+{^#IEX3BQAE9lZ$`TG zI&j`l=USPA7c%Bay3D7zDO%ruiWN!37%syK{96V#)+O z<_`RWZ)_rE+H#&z{XkccDdfnPUHQu}SHLIxe|^tTGr6?K^tMCC68jABjmMtIT+lQrb{B1? z*nHk|%X{Sp+y-J={eK0|rcfN^=pE;hO3}BM<`AijELg|RU8u#5?zskV8al8%Xast# zBL4i7v>52C>%6;4S9^0IKb^iQ1^>!%b}Zgu3QRfh=`^ zt;7WRa3`)JNb5_^a(Ju#OSe4RDig>?`n4?e&GreO4K7QKJqKcimn>9f-9rGVx-$Xf zAc6sYPQ4^z(WpF+VtTA92RCsT#o>@|aQ^+K4Gq*toBmhtMs4X_F$rd@upyg2)nWh_ zP6*rxV-lr9lE#I?W#L=tF+y%pb^yU)-?&Kq@K%3;kKE_zmLor_lnbwt_h`RAzioWO zo>k|XRC(R~GpUV6>;dQD$q?NU&nqaDLg(-9XtWo65CsErFV@v3wV29veU^QI>fXu< zoc{P|H?wY`CT3UO%M5B0jP;g-IhAJjvnVUL ztRr2YV3o@n81Tg*+~@V!Tw{TW;R>HFp=;{Fbq-KiSe>3jQbFS=<#T{*tHKjRX&{xQD|Na`*Vg`*O*l`u6M2{#$cCjR_iOssk1%ceb3ukIuPQCEFQpQr0ALTkc8Ul z5b3lUrooeMD$k%;Q0-MXlllX2^DtB!@q^5JDm?h22xd-tB5O z&o=0v+E{&n``m}2TC+-XI-1;~!?<)wvOS(1RbD4*grpjAIt0^WY=m z*wq@RjD2FD{zc&1bwy9r7LFd6-)+#~oyS9%%28y7_SIg{!sN55n!t#Y?hRoP~ge0RSjV5=qh@Om1sJ zEM)EFAsD{Xc&LlrTWD@;Y;|Z(j$#vFvJ)9-q@EQGB?@& zJ{jfBy`35aQF$j~XnQa62``Dm91S|EWK(vaM4LI&&|>tG;{~$NG^j$?F7%{iIo%n^ z>onD4B7arJr%T3~LQ+EHPBcY0{^-kg&>_~KfAgHH1tBb|g5|uA^LDcZkTJ;;#&Q`W z35!N)gh#MgD+ypCp88t9T>A`cUN2!XnJ45;e(>MO9Ow>DTIGHanunp%Ll(x_qYv^f zYxOK;kyP@A?s>5L5r9Z^N5l6}xH@vf{4URIK%m3I`U4+SI4f9ZQgt>twy*NV+l6GW z%+1*%9#6`FT|zImO`H`T2)i1l1qEb4!0h_dYgA>z;y06iIu{95g!Zn;y87vAvbZL) z(8fZ&b^97@pCM^=dQVP-({EVTcOysU({wl=VV zcOsS;0riYi{4uABEG4T7-IxZUF2J|u{8W%P7g8^Mi5qwOw6PHVoW@Svx_fdzDrEO~ zI)@5MRIv*y=8t2B%q3^co<1QxgmY9SExZ;ty|}yOi%W_UWskE08+Bhh(M&IWmBkt_ z7Ky*alBM0|sEMO^r8UE(H!BJC6r_rypaYL7!{IN_XNWLosNLJ`9ft_YWE8Yw-q|$A z5}lc>WQ*~eK4rl>3I>U{Az<_OGNSM*FJN>&iLl>zC)HWUxT#O=A}jr|7kD_o3t}dP zFc||@c&EM|9B*CYI}gf;>H>M-UHA(GtiD2|CWMcA2P!^AaW!De>)=Y(mOxzB8djAU zfma-s9pE_d=Yn?>`uVqsI$MG_^iRd`=I z8Z^fMB`vysh)c?p%!`%IPM{xscKY}?@^;>oSIE#`px_rs%nJGm!hyKG;=W_NmcFSU zJWqXD&qoCYKx7WL&96Y*jR)gUjz(Ay%9dhqST~TvV|;p)R&4LBeTTt~k)V&u_M>_v zxQ4gM#vI;M@^q8nAY3*9hTG8aY%T6UG;swOn}=fqnN?MAwZt*+H?^6*LRt8Sa{pNy zLSt~8=VjHnX=XQN(vTyYEqe~X(t~W=Yk!P9wt=Oaf^tM|f{@_OO)R){1Y4S|Upz^) zA_&{j0#j|=>{D7pPSft1gKx1G4Gewhd|pyxOU(7yHbfDqZNp<0BA!=&47oItphY2DsL86{9fJ+r)OyR>_FNsp@5j6`Dw$v5XmL9C1_!B+I{>xzO8t zh8TGyyHWxXoNO5|+G-^nYi^${UqO?Yb}3~0oy8WUWdOc5+wTGI9zFMAYo7t*+XM%p zDQvUJpHdzgV0W*e#;LGm?YA&CUT&)Mw2EE7pZD9=c2;+7?_0;W{3^cUlsd`8&oH{+ z8F8_pqt9s=(jXiZKSTMGMpfVuQ9t$op7=N4X)90NiutwFidEEen?Y&Ad=3Y`^S`CD?g)YzRtxC!vKGLndmZAhStT3wIuZo^@uml~!d{R1U9 zwuH5D-pc#{#LruSO2yB=8lz1hiP$x6v^(numX7hb0;K|vJ%;Dd(~L20$6&~d*ibWIaaK4l##(UDOF|k zD@U`gQxffIOQ@6U{|SwRv2}M0%pGAu!UivlXF%U;vPmxZ!{3);<0BW;+FrcH-^q`d zotO{YF!5;y7(0)sul8&2jOw}Bm=~=XIj=aBLFo2p$p;K;ahgin4Wg3rt8$4r0Yj#Q zgWY)(AV*(Tfp=J(lxQHfU=PaghmoF_;3%M8@vUp$Gr2eN?U;7ZKC zJIq$pz>u(Xf3Du-FxK;S79HLM{X zj6iA&1as4|-lUj%yrSO0@=_H<|C$QwxU>hp@fap8nn#EQ-A=$}!ssHmylNAf(3aNQ{$3<<7mq@mdsKA4}bML>os;^Bh~&V_Jh@>0g3n-#?LUgB&W0hJOs=bGt@9hYAD^tDRwjtV(U%CD*0ys>|92IZAAB`nNG!0 zE_ZqZ4B`yvXQT%)hlOM@AW@NgQx`0vyFPmRF0>D6sPN!~G}KzUpsTzMfoX0pbcusF zsG3Kow8DGhmM90bZoy%t#4rb6;Ldr`C@Cy6CqiBT_t0Fsg5x9fSxhEe^bdbji;t+D ziS?=Aer+rYkUqAHUwor?YGGJfP#0~tPu(W7oe$bU7=o;y=^rdC2XVoax*J>he+E|e|VZr~tE>did z+fw6Mz!K>Js{;6AWmuKCB;Lsjb+4x7Ah(VlB>|Xbl>DVA3|XVxq3qudo_2QW^~42pfs z75{Qs;QE$w0<$euKm3{+uLp@)cXMq%Q#~lR1>o~m&dpDCS{NGy0}wEpfK`-}qo!E6 zF!fmZ#RTW%>vc}wx7@13q}VkuN!(IPe~p+!zd40Q)XyiJk6EfV4Gy~{9ZyQBnCyq7 zy(AD*B5t8mKx)L9a)uj_t z+j^3BvHI3AwHz~q@o1F$qxz;mZuT;L0mnI=3z2L87SxpdTCXr+uWntP6IJ8I0C^bk z$LRRLTLDrw8IqEphJBW43Wph$FM7NICXph=Yri#`fNu=@rx_-(!ed3etf?S|?a;PV zR0I$016h zV>0e`qn7_W$@v=Q0?*p9MZNrQ4Wlpa&MB&-lbn(`6yh@g_`;@Mo;bULvNa!Ahr4gl z7>v0)_xx&^q942+z11it)CI44;kV!4R1=y-)sC8vl{{doKI6F{2etnOCjG#Utgax% zu0sxh>J_m-I6kn~WMw*jaUHqvpc{c=!R;Y+=Q>~hA$$%u(TZ-_Xy?+LURs~Uakloj zg&TB(1eF+g2Jdxmj@SpN(WVHPgdN-}Ed7Ctcw@ce;%vo1k0u_OivI+~s~@VQXDesD z`@m8uk+52cRS!49Pm7`+Smjg=l$)f3TS*%&kuTEqb~^gt3R8nvF2N($gt5{ph9j;} z;T}74ulS~^B>i4toBoJ!?Wz=MB@xE5PZ{KuevOWCu{|{oY@zvhVc&{%R1qZV>ol)| zjfd?Ap<)>&F%h61$2D!4C0YN+GvGZke6qJXqeu-9Kil;$E*;rsp@)7igO^Dq{D*t zYE`V9+DD01{h53kv=C4O=|DL?GfJK@qiv88=Cl*DdSmP`IEN(cO&RjG>8}TIdmts^ z*Ej$GH^Tnv1Zmce@F)EP(|A1R#@*gbHb-nDO(rfsa_}yhd&=0mQ;3);zE4JvD#P5v zO7_!OJ9cSjraLF%>-{CApNet5`Yee%*(AEV|KS$0d!dw2GeC6Bk}*koX-J(T#d$6u zItMt(Jq%LJH@ev2@_>h-pV%c1bd%g{mMZ3lW;X=E;E=g{iJ-foU?`&kDf0Df9l9-q zBKw0|1FwbQeVYA)B;0Sy%foV#px-A|uJh~^ZBTbhW;Voqbhevf0z)4BZ-29Q?JU)Jj@_6DY>0wb zS9x3(IZREtHsXI@xFpP0kxf6MsJ^@28?ZOW0lF4@QHLhkZ8h-x*?Rye!N7(sSQo#P z(k%e_PT$DdYAvFX{jf8ZUuMBkrr~xUvxHuR_(+NfN0xeQ7}LfqBm8)_QF-5GNUv4NwdiyknH0)iOz(4C>F$wu;dOJLLn| z0^|7^kIOq9i0*VCm6V~%HIN=g+rSRu96yA{+8;6OP(+gk!AJ`c%b{ZuP!us;^tvw9 zhC_mk?-Sf4P*na*yD;d@VyB`6VIq$Zq6|*9EZGw%d<@D+6QwpGnTk~9Yv@9~%?y~M zWpMX?lQzL<#d|93o8kaXExNXEZc4X7Xm}ewM}P*Ca3l|fTR-4kkY+bS{*Ihf<@2qr{K?!&b4KEtjNc3$=}Hj_91+P$RyCy!|6#j{b8gDzx}?aI z-P`IXfwg*ILEiHcJZD)G%B1tf7sk@eR`PMEsUjsdPwCL?C&EhGjpzf)BTI7vK^MrJ zEi>DLRB;2>&3V93om0x=LDzGIcJ0qEyE-2?Y-%s;VKl2P-1U*XV+x7C0cZ0#ePSs+ zWwatUsxUzi29J_tX(uH4>=YjmP!|Zl;h(m{L&l3F;ga`x3TP_37OOKOMie7Ip}Uji z#$U}^6l=~JqcJ=-ld2+xxKGZCdo%PIrA8V9Rsd$FK_D?HG!j8Gfsb}0>Rd!fFVf*l zj+G=53A;^WRDtN~-z^120WlCBV^_a`3{S-6Bw(;uEph-J5kyfSJV)pKDFU-`@@YOo zi`*xg!G4$??Q?)`7=I}87%g~(Cpk4F>N@qk7{aX)kEX^ZoF6>ZL6T@@Qyc59*!_`= zOGxD6zF@4bmLSv2n*BWqlp&a(fYnU|{-5m*0jU61|6LG9X$!7=(<#e}bb+LLq98;f z)!8m+rGZLNWlt3!xkdxXZUjHeZp?a~MYsruA6x@y%aS*kdnvJPCA^8!iL>+Prf>bqFQC=i7Q#kYuSCCw+~A7x0I-Sg!~4e3BnPWgeR0 x7Ghcn+lr&wXo?PLFc`WKXma?a`$In{A^Kzr&(!`th%<*ENn59k>m%K6d;ng?Jh=b> From d2802b11dac36bc2df199236d20fd05af686beef Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 5 Dec 2023 13:26:29 -0500 Subject: [PATCH 57/76] Addressing the feedback from Nov 7 and Nov 20 - part 2 --- .../cram/compression/rans/Constants.java | 7 +- .../cram/compression/rans/RANSEncode.java | 22 ++++++- .../rans/rans4x8/RANS4x8Decode.java | 7 +- .../rans/rans4x8/RANS4x8Encode.java | 64 ++++--------------- .../rans/ransnx16/RANSNx16Decode.java | 7 +- .../rans/ransnx16/RANSNx16Encode.java | 52 +++------------ .../cram/compression/rans/RansTest.java | 4 +- 7 files changed, 58 insertions(+), 105 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java index 2d35c60635..3ae46a76a1 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java @@ -3,7 +3,12 @@ final public class Constants { public static final int TOTAL_FREQ_SHIFT = 12; public static final int TOTAL_FREQ = (1 << TOTAL_FREQ_SHIFT); // 4096 + public static final int NUMBER_OF_SYMBOLS = 256; public static final int RANS_4x8_LOWER_BOUND = 1 << 23; + public static final int RANS_4x8_NUM_INTERLEAVED_STREAMS = 4; + public static final int RANS_4x8_ORDER_BYTE_LENGTH = 1; + public static final int RANS_4x8_COMPRESSED_BYTE_LENGTH = 4; + public static final int RANS_4x8_RAW_BYTE_LENGTH = 4; + public static final int RANS_4x8_PREFIX_BYTE_LENGTH = RANS_4x8_ORDER_BYTE_LENGTH + RANS_4x8_COMPRESSED_BYTE_LENGTH + RANS_4x8_RAW_BYTE_LENGTH; public static final int RANS_Nx16_LOWER_BOUND = 1 << 15; - public static final int NUMBER_OF_SYMBOLS = 256; } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java index 6f21539500..d6763ed6f6 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java @@ -47,6 +47,26 @@ protected ByteBuffer allocateOutputBuffer(final int inSize) { return outputBuffer; } - //TODO: add buildSymbols0 and buildSymbols1 + protected void buildSymsOrder0(final int[] frequencies) { + updateEncodingSymbols(frequencies, getEncodingSymbols()[0]); + } + + protected void buildSymsOrder1(final int[][] frequencies) { + final RANSEncodingSymbol[][] encodingSymbols = getEncodingSymbols(); + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + updateEncodingSymbols(frequencies[i], encodingSymbols[i]); + } + } + + private void updateEncodingSymbols(int[] frequencies, RANSEncodingSymbol[] encodingSymbols) { + int cumulativeFreq = 0; + for (int symbol = 0; symbol < Constants.NUMBER_OF_SYMBOLS; symbol++) { + if (frequencies[symbol] != 0) { + //For each symbol, set start = cumulative frequency and freq = frequencies[symbol] + encodingSymbols[symbol].set(cumulativeFreq, frequencies[symbol], Constants.TOTAL_FREQ_SHIFT); + cumulativeFreq += frequencies[symbol]; + } + } + } } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index 54d19a5e17..5f93ae58c4 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -22,12 +22,12 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { return EMPTY_BUFFER; } - // first byte of compressed stream gives order - final RANSParams.ORDER order = RANSParams.ORDER.fromInt(inBuffer.get()); - // For RANS decoding, the bytes are read in little endian from the input stream inBuffer.order(ByteOrder.LITTLE_ENDIAN); + // first byte of compressed stream gives order + final RANSParams.ORDER order = RANSParams.ORDER.fromInt(inBuffer.get()); + // compressed bytes length final int inSize = inBuffer.getInt(); if (inSize != inBuffer.remaining() - RAW_BYTE_LENGTH) { @@ -132,7 +132,6 @@ private void uncompressOrder1Way4(final ByteBuffer inBuffer, final ByteBuffer ou final int out_sz = outBuffer.remaining(); long rans0, rans1, rans2, rans7; - inBuffer.order(ByteOrder.LITTLE_ENDIAN); rans0 = inBuffer.getInt(); rans1 = inBuffer.getInt(); rans2 = inBuffer.getInt(); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index 20331f9c69..0d962baf82 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -11,17 +11,12 @@ import java.nio.ByteOrder; public class RANS4x8Encode extends RANSEncode { - private static final int ORDER_BYTE_LENGTH = 1; - private static final int COMPRESSED_BYTE_LENGTH = 4; - private static final int RAW_BYTE_LENGTH = 4; - private static final int PREFIX_BYTE_LENGTH = ORDER_BYTE_LENGTH + COMPRESSED_BYTE_LENGTH + RAW_BYTE_LENGTH; // streams smaller than this value don't have sufficient symbol context for ORDER-1 encoding, // so always use ORDER-0 private static final int MINIMUM__ORDER_1_SIZE = 4; private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); - public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; @@ -45,23 +40,22 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params } private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { - final int inSize = inBuffer.remaining(); - final ByteBuffer outBuffer = allocateOutputBuffer(inSize); + final int inputSize = inBuffer.remaining(); + final ByteBuffer outBuffer = allocateOutputBuffer(inputSize); // move the output buffer ahead to the start of the frequency table (we'll come back and // write the output stream prefix at the end of this method) - outBuffer.position(PREFIX_BYTE_LENGTH); // start of frequency table + outBuffer.position(Constants.RANS_4x8_PREFIX_BYTE_LENGTH); // start of frequency table // get the normalised frequencies of the alphabets - final int[] F = calcFrequenciesOrder0(inBuffer); + final int[] normalizedFreq = calcFrequenciesOrder0(inBuffer); // using the normalised frequencies, set the RANSEncodingSymbols - buildSymsOrder0(F); - + buildSymsOrder0(normalizedFreq); final ByteBuffer cp = outBuffer.slice(); // write Frequency table - final int frequencyTableSize = writeFrequenciesOrder0(cp, F); + final int frequencyTableSize = writeFrequenciesOrder0(cp, normalizedFreq); inBuffer.rewind(); @@ -108,7 +102,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { inBuffer.position(inBuffer.limit()); // write the prefix at the beginning of the output buffer - writeCompressionPrefix(RANSParams.ORDER.ZERO, outBuffer, inSize, frequencyTableSize, cdata_size); + writeCompressionPrefix(RANSParams.ORDER.ZERO, outBuffer, inputSize, frequencyTableSize, cdata_size); return outBuffer; } @@ -117,16 +111,16 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { final ByteBuffer outBuffer = allocateOutputBuffer(inSize); // move to start of frequency - outBuffer.position(PREFIX_BYTE_LENGTH); + outBuffer.position(Constants.RANS_4x8_PREFIX_BYTE_LENGTH); // get normalized frequencies - final int[][] F = calcFrequenciesOrder1(inBuffer); + final int[][] normalizedFreq = calcFrequenciesOrder1(inBuffer); // using the normalised frequencies, set the RANSEncodingSymbols - buildSymsOrder1(F); + buildSymsOrder1(normalizedFreq); final ByteBuffer cp = outBuffer.slice(); - final int frequencyTableSize = writeFrequenciesOrder1(cp, F); + final int frequencyTableSize = writeFrequenciesOrder1(cp, normalizedFreq); inBuffer.rewind(); final int in_size = inBuffer.remaining(); long rans0, rans1, rans2, rans3; @@ -214,16 +208,16 @@ private static void writeCompressionPrefix( final int frequencyTableSize, final int compressedBlobSize) { ValidationUtils.validateArg(order == RANSParams.ORDER.ONE || order == RANSParams.ORDER.ZERO,"unrecognized RANS order"); - outBuffer.limit(PREFIX_BYTE_LENGTH + frequencyTableSize + compressedBlobSize); + outBuffer.limit(Constants.RANS_4x8_PREFIX_BYTE_LENGTH + frequencyTableSize + compressedBlobSize); // go back to the beginning of the stream and write the prefix values // write the (ORDER as a single byte at offset 0) outBuffer.put(0, (byte) (order == RANSParams.ORDER.ZERO ? 0 : 1)); outBuffer.order(ByteOrder.LITTLE_ENDIAN); // move past the ORDER and write the compressed size - outBuffer.putInt(ORDER_BYTE_LENGTH, frequencyTableSize + compressedBlobSize); + outBuffer.putInt(Constants.RANS_4x8_ORDER_BYTE_LENGTH, frequencyTableSize + compressedBlobSize); // move past the compressed size and write the uncompressed size - outBuffer.putInt(ORDER_BYTE_LENGTH + COMPRESSED_BYTE_LENGTH, inSize); + outBuffer.putInt(Constants.RANS_4x8_ORDER_BYTE_LENGTH + Constants.RANS_4x8_COMPRESSED_BYTE_LENGTH, inSize); outBuffer.rewind(); } @@ -333,36 +327,6 @@ private static int[][] calcFrequenciesOrder1(final ByteBuffer in) { return F; } - private void buildSymsOrder0(final int[] F) { - final RANSEncodingSymbol[] encodingSymbols = getEncodingSymbols()[0]; - - // T = running sum of frequencies including the current symbol - // F[j] = frequency of symbol "j" - // C[j] = cumulative frequency of all the symbols preceding "j" (and excluding the frequency of symbol "j") - int cumulativeFreq = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F[j] != 0) { - //For each symbol, set start = cumulative frequency and freq = frequency - encodingSymbols[j].set(cumulativeFreq, F[j], Constants.TOTAL_FREQ_SHIFT); - cumulativeFreq += F[j]; - } - } - } - - private void buildSymsOrder1(final int[][] F) { - final RANSEncodingSymbol[][] encodingSymbols = getEncodingSymbols(); - for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - final int[] F_i_ = F[i]; - int cumulativeFreq = 0; - for (int symbol = 0; symbol < Constants.NUMBER_OF_SYMBOLS; symbol++) { - if (F_i_[symbol] != 0) { - encodingSymbols[i][symbol].set(cumulativeFreq, F_i_[symbol], Constants.TOTAL_FREQ_SHIFT); - cumulativeFreq += F_i_[symbol]; - } - } - } - } - private static int writeFrequenciesOrder0(final ByteBuffer cp, final int[] F) { final int start = cp.position(); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index e493dd5364..afc0eee3ba 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -197,7 +197,6 @@ private void uncompressOrder1WayN( final int shift = frequencyTableFirstByte >> 4; readFrequencyTableOrder1(freqTableSource, shift); final int outputSize = outBuffer.remaining(); - inBuffer.order(ByteOrder.LITTLE_ENDIAN); // Nway parallel rans states. Nway = 4 or 32 final int Nway = ransNx16Params.getNumInterleavedRANSStates(); @@ -460,21 +459,21 @@ private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){ // Decode the compressed interleaved stream final int[] uncompressedLengths = new int[numInterleaveStreams]; - final ByteBuffer[] TransposedData = new ByteBuffer[numInterleaveStreams]; + final ByteBuffer[] transposedData = new ByteBuffer[numInterleaveStreams]; for ( int j=0; j j){ uncompressedLengths[j]++; } - TransposedData[j] = uncompress(inBuffer, uncompressedLengths[j]); + transposedData[j] = uncompress(inBuffer, uncompressedLengths[j]); } // Transpose final ByteBuffer outBuffer = ByteBuffer.allocate(outSize); for (int j = 0; j > 2) : (inputSize >> 5); int remainingSize = inputSize - (interleaveSize * Nway); int reverseIndex = 1; + final long[] rans = new long[Nway]; + // initialize rans states + for (int r=0; r0){ // encode remaining elements first - int remainingSymbol =0xFF & inBuffer.get(inputSize - reverseIndex); + int remainingSymbol = 0xFF & inBuffer.get(inputSize - reverseIndex); rans[remainingSize - 1] = ransEncodingSymbols[remainingSymbol].putSymbolNx16(rans[remainingSize - 1], ptr); remainingSize --; reverseIndex ++; @@ -244,8 +243,8 @@ private void compressOrder1WayN ( // normalise frequencies with a constant shift Utils.normaliseFrequenciesOrder1Shift(frequencies, Constants.TOTAL_FREQ_SHIFT); - // set encoding symbol - buildSymsOrder1(frequencies); // TODO: move into utils + // using the normalised frequencies, set the RANSEncodingSymbols + buildSymsOrder1(frequencies); // uncompress for Nway = 4. then extend Nway to be variable - 4 or 32 final int Nway = ransNx16Params.getNumInterleavedRANSStates(); @@ -469,39 +468,6 @@ private static void writeAlphabet(final ByteBuffer cp, final int[] F) { cp.put((byte) 0); } - private void buildSymsOrder0(final int[] F) { - - // updates all the encodingSymbols - final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; - - // F[j] = frequency of symbol "j" - // cumulativeFreq = cumulative frequency of all the symbols preceding "j" (excluding the frequency of symbol "j") - int cumulativeFreq = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F[j] != 0) { - - //For each symbol, set start = cumulative frequency and freq = frequency - syms[j].set(cumulativeFreq, F[j], Constants.TOTAL_FREQ_SHIFT); - cumulativeFreq += F[j]; - } - } - } - - private void buildSymsOrder1(final int[][] F) { - // TODO: Call buildSymsOrder0 from buildSymsOrder1 - final RANSEncodingSymbol[][] encodingSymbols = getEncodingSymbols(); - for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - final int[] F_i_ = F[i]; - int cumulativeFreq = 0; - for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - if (F_i_[j] != 0) { - encodingSymbols[i][j].set(cumulativeFreq, F_i_[j], Constants.TOTAL_FREQ_SHIFT); - cumulativeFreq += F_i_[j]; - } - } - } - } - private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuffer){ // Find the symbols that benefit from RLE, i.e, the symbols that occur more than 2 times in succession. diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 0ba31a4c4b..78ce092ff5 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -186,7 +186,7 @@ public void testRans4x8BuffersMeetBoundaryExpectations( final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(rawSize,rawData,ransEncode, ransDecode,params); Assert.assertTrue(compressed.limit() > 10); Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); - Assert.assertEquals(compressed.getInt(), compressed.limit() - 1 - 4 - 4); + Assert.assertEquals(compressed.getInt(), compressed.limit() - Constants.RANS_4x8_PREFIX_BYTE_LENGTH); Assert.assertEquals(compressed.getInt(), rawSize); } @@ -234,7 +234,7 @@ public void testRans4x8Header( // first byte of compressed data gives the order Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); // the next 4 bytes gives the compressed size - Assert.assertEquals(compressed.getInt(), compressed.limit() - 9); + Assert.assertEquals(compressed.getInt(), compressed.limit() - Constants.RANS_4x8_PREFIX_BYTE_LENGTH); // the next 4 bytes gives the uncompressed size Assert.assertEquals(compressed.getInt(), rawData.limit()); } From e6b06a533546af4f42bb8791a9153c2360b5ff40 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Wed, 6 Dec 2023 17:36:06 -0500 Subject: [PATCH 58/76] Addressing the feedback from Nov 7 and Nov 20 - part 3 --- .../cram/compression/rans/Constants.java | 1 - .../compression/rans/rans4x8/RANS4x8Encode.java | 4 ++-- .../rans/ransnx16/RANSNx16Decode.java | 11 +++++++++-- .../rans/ransnx16/RANSNx16Encode.java | 16 ++++++++++------ .../samtools/cram/compression/rans/RansTest.java | 6 ++++++ 5 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java index 3ae46a76a1..f970582f48 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Constants.java @@ -5,7 +5,6 @@ final public class Constants { public static final int TOTAL_FREQ = (1 << TOTAL_FREQ_SHIFT); // 4096 public static final int NUMBER_OF_SYMBOLS = 256; public static final int RANS_4x8_LOWER_BOUND = 1 << 23; - public static final int RANS_4x8_NUM_INTERLEAVED_STREAMS = 4; public static final int RANS_4x8_ORDER_BYTE_LENGTH = 1; public static final int RANS_4x8_COMPRESSED_BYTE_LENGTH = 4; public static final int RANS_4x8_RAW_BYTE_LENGTH = 4; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index 0d962baf82..827fd4b0c7 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -14,7 +14,7 @@ public class RANS4x8Encode extends RANSEncode { // streams smaller than this value don't have sufficient symbol context for ORDER-1 encoding, // so always use ORDER-0 - private static final int MINIMUM__ORDER_1_SIZE = 4; + private static final int MINIMUM_ORDER_1_SIZE = 4; private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params) { @@ -22,7 +22,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params return EMPTY_BUFFER; } initializeRANSEncoder(); - if (inBuffer.remaining() < MINIMUM__ORDER_1_SIZE) { + if (inBuffer.remaining() < MINIMUM_ORDER_1_SIZE) { // ORDER-1 encoding of less than 4 bytes is not permitted, so just use ORDER-0 return compressOrder0Way4(inBuffer); } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index afc0eee3ba..62fc3a911d 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -166,7 +166,6 @@ private void uncompressOrder1WayN( final ByteBuffer inBuffer, final ByteBuffer outBuffer, final RANSNx16Params ransNx16Params) { - initializeRANSDecoder(); // read the first byte final int frequencyTableFirstByte = (inBuffer.get() & 0xFF); @@ -189,11 +188,19 @@ private void uncompressOrder1WayN( freqTableSource = ByteBuffer.allocate(uncompressedLength); final ByteBuffer compressedFrequencyTableBuffer = ByteBuffer.wrap(compressedFreqTable); compressedFrequencyTableBuffer.order(ByteOrder.LITTLE_ENDIAN); - uncompressOrder0WayN(compressedFrequencyTableBuffer, freqTableSource, uncompressedLength,new RANSNx16Params(0x00)); // format flags = 0 + + // Uncompress using RANSNx16 Order 0, Nway = 4. + // formatFlags = (~RANSNx16Params.ORDER_FLAG_MASK & ~RANSNx16Params.N32_FLAG_MASK) = ~(RANSNx16Params.ORDER_FLAG_MASK | RANSNx16Params.N32_FLAG_MASK) + uncompressOrder0WayN(compressedFrequencyTableBuffer, freqTableSource, uncompressedLength,new RANSNx16Params(~(RANSNx16Params.ORDER_FLAG_MASK | RANSNx16Params.N32_FLAG_MASK))); // format flags = 0 } else { freqTableSource = inBuffer; } + + // Moving initializeRANSDecoder() from the beginning of this method to this point in the code + // due to the nested call to uncompressOrder0WayN, which also invokes the initializeRANSDecoder() method. + // TODO: we should work on a more permanent solution for this issue! + initializeRANSDecoder(); final int shift = frequencyTableFirstByte >> 4; readFrequencyTableOrder1(freqTableSource, shift); final int outputSize = outBuffer.remaining(); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index ecdfa8801f..cd6ba4c3ee 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -16,7 +16,6 @@ public class RANSNx16Encode extends RANSEncode { ///////////////////////////////////////////////////////////////////////////////////////////////// private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); - private static final int MINIMUM__ORDER_1_SIZE = 4; public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransNx16Params) { if (inBuffer.remaining() == 0) { @@ -78,8 +77,8 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN return outBuffer; } - // if after encoding pack and rle, the inputBuffer size < 4, then use order 0 - if (inputBuffer.remaining() < MINIMUM__ORDER_1_SIZE && ransNx16Params.getOrder() == RANSParams.ORDER.ONE) { + // if after encoding pack and rle, the inputBuffer size < Nway, then use order 0 + if (inputBuffer.remaining() < ransNx16Params.getNumInterleavedRANSStates() && ransNx16Params.getOrder() == RANSParams.ORDER.ONE) { // set order flag to "0" in the first byte of the outBuffer outBuffer.put(0,(byte)(outBuffer.get(0) & ~RANSNx16Params.ORDER_FLAG_MASK)); @@ -191,7 +190,6 @@ private void compressOrder1WayN ( final ByteBuffer inBuffer, final RANSNx16Params ransNx16Params, final ByteBuffer outBuffer) { - initializeRANSEncoder(); final int[][] frequencies = buildFrequenciesOrder1(inBuffer, ransNx16Params.getNumInterleavedRANSStates()); // normalise frequencies with a variable shift calculated @@ -208,9 +206,15 @@ private void compressOrder1WayN ( frequencyTable.limit(uncompressedFrequencyTableSize); frequencyTable.rewind(); - // compressed frequency table using RANS Nx16 Order 0 - compressOrder0WayN(frequencyTable, new RANSNx16Params(0x00), compressedFrequencyTable); + // Compress using RANSNx16 Order 0, Nway = 4. + // formatFlags = (~RANSNx16Params.ORDER_FLAG_MASK & ~RANSNx16Params.N32_FLAG_MASK) = ~(RANSNx16Params.ORDER_FLAG_MASK | RANSNx16Params.N32_FLAG_MASK) + compressOrder0WayN(frequencyTable, new RANSNx16Params(~(RANSNx16Params.ORDER_FLAG_MASK | RANSNx16Params.N32_FLAG_MASK)), compressedFrequencyTable); frequencyTable.rewind(); + + // Moving initializeRANSEncoder() from the beginning of this method to this point in the code + // due to the nested call to compressOrder0WayN, which also invokes the initializeRANSEncoder() method. + // TODO: we should work on a more permanent solution for this issue! + initializeRANSEncoder(); final int compressedFrequencyTableSize = compressedFrequencyTable.limit(); final ByteBuffer cp = outBuffer.slice(); diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 78ce092ff5..dd63e45c2a 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -51,6 +51,8 @@ public Object[][] getRansTestData() { { new TestDataEnvelope(new byte[] {0, 1}) }, { new TestDataEnvelope(new byte[] {0, 1, 2}) }, { new TestDataEnvelope(new byte[] {0, 1, 2, 3}) }, + { new TestDataEnvelope(new byte[] {1, 2, 3, 4}) }, + { new TestDataEnvelope(new byte[] {1, 2, 3, 4, 5}) }, { new TestDataEnvelope(new byte[1000]) }, { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> (byte) 1)) }, { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> Byte.MIN_VALUE)) }, @@ -58,6 +60,10 @@ public Object[][] getRansTestData() { { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> (byte) index.intValue())) }, { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> index < n / 2 ? (byte) 0 : (byte) 1)) }, { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> index < n % 2 ? (byte) 0 : (byte) 1)) }, + { new TestDataEnvelope(randomBytesFromGeometricDistribution(10, 0.1)) }, + { new TestDataEnvelope(randomBytesFromGeometricDistribution(31, 0.1)) }, + { new TestDataEnvelope(randomBytesFromGeometricDistribution(32, 0.1)) }, + { new TestDataEnvelope(randomBytesFromGeometricDistribution(33, 0.1)) }, { new TestDataEnvelope(randomBytesFromGeometricDistribution(1000, 0.1)) }, { new TestDataEnvelope(randomBytesFromGeometricDistribution(1000, 0.01)) }, { new TestDataEnvelope(randomBytesFromGeometricDistribution(10 * 1000 * 1000 + 1, 0.01)) }, From 1a89cb4b530a72aa8e7c94fac4f87f1f77872684 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Wed, 13 Dec 2023 15:05:32 -0500 Subject: [PATCH 59/76] Addressing the feedback from Nov 7 and Nov 20 - part 4 --- .../cram/compression/rans/RANSDecode.java | 3 + .../cram/compression/rans/RANSEncode.java | 3 + .../rans/rans4x8/RANS4x8Decode.java | 3 + .../rans/rans4x8/RANS4x8Encode.java | 3 + .../rans/ransnx16/RANSNx16Decode.java | 30 ++++--- .../rans/ransnx16/RANSNx16Encode.java | 15 ++-- .../htsjdk/samtools/cram/RANSInteropTest.java | 87 +++++++++++++++++-- .../cram/compression/rans/RansTest.java | 80 +++++------------ 8 files changed, 139 insertions(+), 85 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java index d20826f12e..154cfa9614 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecode.java @@ -15,6 +15,9 @@ protected RANSDecodingSymbol[][] getDecodingSymbols() { return decodingSymbols; } + // This method assumes that inBuffer is already rewound. + // It uncompresses the data in the inBuffer, leaving it consumed. + // Returns a rewound ByteBuffer containing the uncompressed data. public abstract ByteBuffer uncompress(final ByteBuffer inBuffer); // Lazy initialization of working memory for the decoder diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java index d6763ed6f6..5a441228bf 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java @@ -12,6 +12,9 @@ protected RANSEncodingSymbol[][] getEncodingSymbols() { return encodingSymbols; } + // This method assumes that inBuffer is already rewound. + // It compresses the data in the inBuffer, leaving it consumed. + // Returns a rewound ByteBuffer containing the compressed data. public abstract ByteBuffer compress(final ByteBuffer inBuffer, final T params); // Lazy initialization of working memory for the encoder diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index 5f93ae58c4..e618f0dcd5 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -17,6 +17,9 @@ public class RANS4x8Decode extends RANSDecode { private static final int RAW_BYTE_LENGTH = 4; private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + // This method assumes that inBuffer is already rewound. + // It uncompresses the data in the inBuffer, leaving it consumed. + // Returns a rewound ByteBuffer containing the uncompressed data. public ByteBuffer uncompress(final ByteBuffer inBuffer) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index 827fd4b0c7..d8726a47dc 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -17,6 +17,9 @@ public class RANS4x8Encode extends RANSEncode { private static final int MINIMUM_ORDER_1_SIZE = 4; private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + // This method assumes that inBuffer is already rewound. + // It compresses the data in the inBuffer, leaving it consumed. + // Returns a rewound ByteBuffer containing the compressed data. public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 62fc3a911d..4970c999f5 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -15,6 +15,9 @@ public class RANSNx16Decode extends RANSDecode { private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); private static final int FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK = 0x01; + // This method assumes that inBuffer is already rewound. + // It uncompresses the data in the inBuffer, leaving it consumed. + // Returns a rewound ByteBuffer containing the uncompressed data. public ByteBuffer uncompress(final ByteBuffer inBuffer) { // For RANS decoding, the bytes are read in little endian from the input stream @@ -61,16 +64,15 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { } // if rle, get rle metadata, which will be used later to decode rle - final int uncompressedRLEMetaDataLength; int uncompressedRLEOutputLength = 0; - final int[] rleSymbols = new int[Constants.NUMBER_OF_SYMBOLS]; + int[] rleSymbols = null; ByteBuffer uncompressedRLEMetaData = null; if (ransNx16Params.isRLE()) { - uncompressedRLEMetaDataLength = Utils.readUint7(inBuffer); + rleSymbols = new int[Constants.NUMBER_OF_SYMBOLS]; + final int uncompressedRLEMetaDataLength = Utils.readUint7(inBuffer); uncompressedRLEOutputLength = uncompressedSize; uncompressedSize = Utils.readUint7(inBuffer); - // TODO: maybe move decodeRLEMeta in-line - uncompressedRLEMetaData = decodeRLEMeta(inBuffer, uncompressedRLEMetaDataLength, rleSymbols); + uncompressedRLEMetaData = decodeRLEMeta(inBuffer, uncompressedRLEMetaDataLength, rleSymbols, ransNx16Params); } ByteBuffer outBuffer = ByteBuffer.allocate(uncompressedSize);; @@ -95,12 +97,12 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { } // if rle, then decodeRLE - if (ransNx16Params.isRLE() && uncompressedRLEMetaData != null) { + if (ransNx16Params.isRLE()) { outBuffer = decodeRLE(outBuffer, rleSymbols, uncompressedRLEMetaData, uncompressedRLEOutputLength); } // if pack, then decodePack - if (ransNx16Params.isPack() && packMappingTable.length > 0) { + if (ransNx16Params.isPack()) { outBuffer = decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); } return outBuffer; @@ -189,7 +191,7 @@ private void uncompressOrder1WayN( final ByteBuffer compressedFrequencyTableBuffer = ByteBuffer.wrap(compressedFreqTable); compressedFrequencyTableBuffer.order(ByteOrder.LITTLE_ENDIAN); - // Uncompress using RANSNx16 Order 0, Nway = 4. + // uncompress using RANSNx16 Order 0, Nway = 4 // formatFlags = (~RANSNx16Params.ORDER_FLAG_MASK & ~RANSNx16Params.N32_FLAG_MASK) = ~(RANSNx16Params.ORDER_FLAG_MASK | RANSNx16Params.N32_FLAG_MASK) uncompressOrder0WayN(compressedFrequencyTableBuffer, freqTableSource, uncompressedLength,new RANSNx16Params(~(RANSNx16Params.ORDER_FLAG_MASK | RANSNx16Params.N32_FLAG_MASK))); // format flags = 0 } @@ -355,7 +357,8 @@ private static int[] readAlphabet(final ByteBuffer cp){ private ByteBuffer decodeRLEMeta( final ByteBuffer inBuffer, final int uncompressedRLEMetaDataLength, - final int[] rleSymbols) { + final int[] rleSymbols, + final RANSNx16Params ransNx16Params) { final ByteBuffer uncompressedRLEMetaData; if ((uncompressedRLEMetaDataLength & 0x01)!=0) { final byte[] uncompressedRLEMetaDataArray = new byte[(uncompressedRLEMetaDataLength-1)/2]; @@ -368,9 +371,12 @@ private ByteBuffer decodeRLEMeta( final ByteBuffer compressedRLEMetaData = ByteBuffer.wrap(compressedRLEMetaDataArray); compressedRLEMetaData.order(ByteOrder.LITTLE_ENDIAN); uncompressedRLEMetaData = ByteBuffer.allocate(uncompressedRLEMetaDataLength / 2); - - // TODO: get Nway from ransParams and use N to uncompress - uncompressOrder0WayN(compressedRLEMetaData,uncompressedRLEMetaData, uncompressedRLEMetaDataLength / 2, new RANSNx16Params(0x00)); // N should come from the prev step + // uncompress using Order 0 and N = Nway + uncompressOrder0WayN( + compressedRLEMetaData, + uncompressedRLEMetaData, + uncompressedRLEMetaDataLength / 2, + new RANSNx16Params(0x00 | ransNx16Params.getFormatFlags() & RANSNx16Params.N32_FLAG_MASK)); } int numRLESymbols = uncompressedRLEMetaData.get() & 0xFF; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index cd6ba4c3ee..9ddc6373a2 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -17,6 +17,9 @@ public class RANSNx16Encode extends RANSEncode { private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + // This method assumes that inBuffer is already rewound. + // It compresses the data in the inBuffer, leaving it consumed. + // Returns a rewound ByteBuffer containing the compressed data. public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransNx16Params) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; @@ -65,10 +68,9 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN // RLE if (ransNx16Params.isRLE()){ - inputBuffer = encodeRLE(inputBuffer, outBuffer); + inputBuffer = encodeRLE(inputBuffer, outBuffer, ransNx16Params); } - if (ransNx16Params.isCAT()) { // Data is uncompressed outBuffer.put(inputBuffer); @@ -472,7 +474,7 @@ private static void writeAlphabet(final ByteBuffer cp, final int[] F) { cp.put((byte) 0); } - private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuffer){ + private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuffer, final RANSNx16Params ransNx16Params){ // Find the symbols that benefit from RLE, i.e, the symbols that occur more than 2 times in succession. // spec: For symbols that occur many times in succession, we can replace them with a single symbol and a count. @@ -548,11 +550,8 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff // compress the rleMetaData Buffer final ByteBuffer compressedRleMetaData = allocateOutputBuffer(rleMetaData.remaining()); - // TODO: Nway? Check other places as well -> How to setInterleaveSize? - can i do it by changing formatflags? - // // Compress lengths with O0 and literals with O0/O1 ("order" param) - // TODO: get Nway from ransParams and use N to uncompress - - compressOrder0WayN(rleMetaData, new RANSNx16Params(0x00),compressedRleMetaData); + // compress using Order 0 and N = Nway + compressOrder0WayN(rleMetaData, new RANSNx16Params(0x00 | ransNx16Params.getFormatFlags() & RANSNx16Params.N32_FLAG_MASK),compressedRleMetaData); // write to compressedRleMetaData to outBuffer Utils.writeUint7(rleMetaData.limit()*2, outBuffer); diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index f34014b7cd..1616f28c7f 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -40,8 +40,62 @@ public class RANSInteropTest extends HtsjdkTest { public static final String COMPRESSED_RANS4X8_DIR = "r4x8"; public static final String COMPRESSED_RANSNX16_DIR = "r4x16"; - // RANS4x8 codecs and testdata - public Object[][] get4x8TestCases() throws IOException { + // enumerates the different flag combinations + public Object[][] get4x8RoundTripTestCases() throws IOException { + + // params: + // uncompressed testfile path, + // RANS encoder, RANS decoder, RANS params + final List rans4x8ParamsOrderList = Arrays.asList( + RANSParams.ORDER.ZERO, + RANSParams.ORDER.ONE); + final List testCases = new ArrayList<>(); + getInteropRawTestFiles() + .forEach(path -> + rans4x8ParamsOrderList.stream().map(rans4x8ParamsOrder -> new Object[]{ + path, + new RANS4x8Encode(), + new RANS4x8Decode(), + new RANS4x8Params(rans4x8ParamsOrder) + }).forEach(testCases::add)); + return testCases.toArray(new Object[][]{}); + } + + // enumerates the different flag combinations + public Object[][] getNx16RoundTripTestCases() throws IOException { + + // params: + // uncompressed testfile path, + // RANS encoder, RANS decoder, RANS params + final List ransNx16ParamsFormatFlagList = Arrays.asList( + 0x00, + RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.N32_FLAG_MASK, + RANSNx16Params.N32_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.CAT_FLAG_MASK, + RANSNx16Params.CAT_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.CAT_FLAG_MASK | RANSNx16Params.N32_FLAG_MASK, + RANSNx16Params.CAT_FLAG_MASK | RANSNx16Params.N32_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.RLE_FLAG_MASK, + RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.PACK_FLAG_MASK, + RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK, + RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK); + final List testCases = new ArrayList<>(); + getInteropRawTestFiles() + .forEach(path -> + ransNx16ParamsFormatFlagList.stream().map(ransNx16ParamsFormatFlag -> new Object[]{ + path, + new RANSNx16Encode(), + new RANSNx16Decode(), + new RANSNx16Params(ransNx16ParamsFormatFlag) + }).forEach(testCases::add)); + return testCases.toArray(new Object[][]{}); + } + + // uses the available compressed interop test files + public Object[][] get4x8DecodeOnlyTestCases() throws IOException { // params: // compressed testfile path, uncompressed testfile path, @@ -60,8 +114,8 @@ public Object[][] get4x8TestCases() throws IOException { return testCases.toArray(new Object[][]{}); } - // RANSNx16 codecs and testdata - public Object[][] getNx16TestCases() throws IOException { + // uses the available compressed interop test files + public Object[][] getNx16DecodeOnlyTestCases() throws IOException { // params: // compressed testfile path, uncompressed testfile path, @@ -86,7 +140,17 @@ public Object[][] getRoundTripTestCases() throws IOException { // params: // compressed testfile path, uncompressed testfile path, // RANS encoder, RANS decoder, RANS params - return Stream.concat(Arrays.stream(get4x8TestCases()), Arrays.stream(getNx16TestCases())) + return Stream.concat(Arrays.stream(get4x8RoundTripTestCases()), Arrays.stream(getNx16RoundTripTestCases())) + .toArray(Object[][]::new); + } + + @DataProvider(name = "decodeOnlyTestCases") + public Object[][] getDecodeOnlyTestCases() throws IOException { + + // params: + // compressed testfile path, uncompressed testfile path, + // RANS encoder, RANS decoder, RANS params + return Stream.concat(Arrays.stream(get4x8DecodeOnlyTestCases()), Arrays.stream(getNx16DecodeOnlyTestCases())) .toArray(Object[][]::new); } @@ -103,7 +167,6 @@ public void testHtsCodecsCorpusIsAvailable() { dataProvider = "roundTripTestCases", description = "Roundtrip using htsjdk RANS. Compare the output with the original file" ) public void testRANSRoundTrip( - final Path unusedcompressedFilePath, final Path uncompressedFilePath, final RANSEncode ransEncode, final RANSDecode ransDecode, @@ -129,7 +192,7 @@ public void testRANSRoundTrip( @Test ( dependsOnMethods = "testHtsCodecsCorpusIsAvailable", - dataProvider = "roundTripTestCases", + dataProvider = "decodeOnlyTestCases", description = "Uncompress the existing compressed file using htsjdk RANS and compare it with the original file.") public void testDecodeOnly( final Path compressedFilePath, @@ -158,4 +221,14 @@ public void testDecodeOnly( } } + // return a list of all raw test files in the htscodecs/tests/dat directory + private List getInteropRawTestFiles() throws IOException { + final List paths = new ArrayList<>(); + Files.newDirectoryStream( + CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat"), + path -> (Files.isRegularFile(path)) && !Files.isHidden(path)) + .forEach(path -> paths.add(path)); + return paths; + } + } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index dd63e45c2a..559ba77ffe 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -190,7 +190,7 @@ public void testRans4x8BuffersMeetBoundaryExpectations( final int rawSize = 1001; final ByteBuffer rawData = ByteBuffer.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(rawSize,rawData,ransEncode, ransDecode,params); - Assert.assertTrue(compressed.limit() > 10); + Assert.assertTrue(compressed.limit() > Constants.RANS_4x8_PREFIX_BYTE_LENGTH); // minimum prefix len when input is not Empty Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); Assert.assertEquals(compressed.getInt(), compressed.limit() - Constants.RANS_4x8_PREFIX_BYTE_LENGTH); Assert.assertEquals(compressed.getInt(), rawSize); @@ -204,21 +204,23 @@ public void testRansNx16BuffersMeetBoundaryExpectations( final int rawSize = 1001; final ByteBuffer rawData = ByteBuffer.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(rawSize,rawData,ransEncode,ransDecode,params); - Assert.assertTrue(compressed.limit() > 1); // minimum prefix len when input is not Empty - final int FormatFlags = compressed.get(); // first byte of compressed data is the formatFlags rawData.rewind(); - final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; + Assert.assertTrue(compressed.limit() > 1); // minimum prefix len when input is not Empty + final int FormatFlags = compressed.get() & 0xFF; // first byte of compressed data is the formatFlags + final int[] frequencies = new int[Constants.NUMBER_OF_SYMBOLS]; final int inSize = rawData.remaining(); for (int i = 0; i < inSize; i ++) { - F[rawData.get(i) & 0xFF]++; + frequencies[rawData.get(i) & 0xFF]++; } int numSym = 0; for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - if (F[i]>0) { + if (frequencies[i]>0) { numSym++; } } if (params.isPack() & (numSym == 0 | numSym > 16)) { + // In the encoder, Packing is skipped if numSymbols = 0 or numSymbols > 16 + // and the Pack flag is unset in the formatFlags Assert.assertEquals(FormatFlags, params.getFormatFlags() & ~RANSNx16Params.PACK_FLAG_MASK); } else { Assert.assertEquals(FormatFlags, params.getFormatFlags()); @@ -229,54 +231,6 @@ public void testRansNx16BuffersMeetBoundaryExpectations( } } - @Test(dataProvider = "rans4x8") - public void testRans4x8Header( - final RANS4x8Encode ransEncode, - final RANS4x8Decode unused, - final RANS4x8Params params) { - final int rawSize = 1000; - final ByteBuffer rawData = ByteBuffer.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); - final ByteBuffer compressed = ransEncode.compress(rawData, params); - // first byte of compressed data gives the order - Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); - // the next 4 bytes gives the compressed size - Assert.assertEquals(compressed.getInt(), compressed.limit() - Constants.RANS_4x8_PREFIX_BYTE_LENGTH); - // the next 4 bytes gives the uncompressed size - Assert.assertEquals(compressed.getInt(), rawData.limit()); - } - - @Test(dataProvider = "ransNx16") - public void testRansNx16Header( - final RANSNx16Encode ransEncode, - final RANSNx16Decode unused, - final RANSNx16Params params) { - final int size = 1000; - final ByteBuffer rawData = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); - final ByteBuffer compressed = ransEncode.compress(rawData, params); - rawData.rewind(); - final int FormatFlags = compressed.get() & 0xFF; // first byte of compressed data is the formatFlags - final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; - final int inSize = rawData.remaining(); - for (int i = 0; i < inSize; i ++) { - F[rawData.get(i) & 0xFF]++; - } - int numSym = 0; - for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - if (F[i]>0) { - numSym++; - } - } - if (params.isPack() & (numSym == 0 | numSym > 16)) { - Assert.assertEquals(FormatFlags, (byte) (params.getFormatFlags() & ~RANSNx16Params.PACK_FLAG_MASK)); - } else { - Assert.assertEquals(FormatFlags, (byte) params.getFormatFlags()); - } - // if nosz flag is not set, then the uncompressed size is recorded - if (!params.isNosz()){ - Assert.assertEquals(Utils.readUint7(compressed), size); - } - } - @Test(dataProvider="allRansAndData") public void testRoundTrip( final RANSEncode ransEncode, @@ -301,7 +255,17 @@ public void testRansNx16RejectEncodeStripe( ransEncode.compress(ByteBuffer.wrap(td.testArray), params); } - // TODO: Add Test to DecodePack with nsym > 16 + @Test( + description = "RANSNx16 Decoding with Pack Flag if (numSymbols > 16 or numSymbols==0) " + + "should throw CRAMException", + expectedExceptions = { CRAMException.class }, + expectedExceptionsMessageRegExp = "Bit Packing is not permitted when number " + + "of distinct symbols is greater than 16 or equal to 0. Number of distinct symbols: 0") + public void testRANSNx16RejectDecodePack(){ + final ByteBuffer compressedData = ByteBuffer.wrap(new byte[]{(byte) RANSNx16Params.PACK_FLAG_MASK, (byte) 0x00, (byte) 0x00}); + final RANSNx16Decode ransDecode = new RANSNx16Decode(); + ransDecode.uncompress(compressedData); + } private static void ransRoundTrip( final RANSEncode ransEncode, @@ -315,7 +279,7 @@ private static void ransRoundTrip( } public ByteBuffer ransBufferMeetBoundaryExpectations( - final int size, + final int rawSize, final ByteBuffer raw, final RANSEncode ransEncode, final RANSDecode ransDecode, @@ -325,10 +289,10 @@ public ByteBuffer ransBufferMeetBoundaryExpectations( final ByteBuffer uncompressed = ransDecode.uncompress(compressed); Assert.assertFalse(compressed.hasRemaining()); compressed.rewind(); - Assert.assertEquals(uncompressed.limit(), size); + Assert.assertEquals(uncompressed.limit(), rawSize); Assert.assertEquals(uncompressed.position(), 0); Assert.assertFalse(raw.hasRemaining()); - Assert.assertEquals(raw.limit(), size); + Assert.assertEquals(raw.limit(), rawSize); Assert.assertEquals(compressed.position(), 0); return compressed; } From f4fd67cee0deb036ebd473ffa504b1b76349d095 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 18 Dec 2023 17:05:45 -0500 Subject: [PATCH 60/76] Addressing the feedback from Nov 7 and Nov 20 - part 5 --- .../compression/rans/ArithmeticDecoder.java | 9 ++-- .../cram/compression/rans/RANSEncode.java | 9 ++-- .../samtools/cram/compression/rans/Utils.java | 8 +-- .../rans/rans4x8/RANS4x8Decode.java | 40 +++++++------- .../rans/ransnx16/RANSNx16Decode.java | 52 +++++++++---------- .../rans/ransnx16/RANSNx16Encode.java | 6 +-- .../cram/compression/rans/RansTest.java | 17 +++--- 7 files changed, 65 insertions(+), 76 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java index 9ae5512892..bfc7f33795 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java @@ -25,23 +25,20 @@ package htsjdk.samtools.cram.compression.rans; final public class ArithmeticDecoder { - public final int[] freq = new int[Constants.NUMBER_OF_SYMBOLS]; - public final int[] cumulativeFreq = new int[Constants.NUMBER_OF_SYMBOLS]; + public final int[] frequencies = new int[Constants.NUMBER_OF_SYMBOLS]; // reverse lookup table public final byte[] reverseLookup = new byte[Constants.TOTAL_FREQ]; public ArithmeticDecoder() { for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - freq[i] = 0; - cumulativeFreq[i] = 0; + frequencies[i] = 0; } } public void reset() { for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - freq[i] = 0; - cumulativeFreq[i] = 0; + frequencies[i] = 0; } for (int i = 0; i < Constants.TOTAL_FREQ; i++) { reverseLookup[i] = 0; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java index 5a441228bf..02c923e631 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java @@ -36,12 +36,11 @@ protected void initializeRANSEncoder() { } protected ByteBuffer allocateOutputBuffer(final int inSize) { - // TODO: This should vary depending on the RANS type and order // This calculation is identical to the one in samtools rANS_static.c - // Presumably the frequency table (always big enough for order 1) = 257*257, then * 3 for each entry - // (byte->symbol, 2 bytes -> scaled frequency), + 9 for the header (order byte, and 2 int lengths - // for compressed/uncompressed lengths) ? Plus additional 5% for..., for what ??? - final int compressedSize = (int) (1.05 * inSize + 257 * 257 * 3 + 9); + // Presumably the frequency table (always big enough for order 1) = 257*257, + // then * 3 for each entry (byte->symbol, 2 bytes -> scaled frequency), + // + 9 for the header (order byte, and 2 int lengths for compressed/uncompressed lengths). + final int compressedSize = (int) (inSize + 257 * 257 * 3 + 9); final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize); if (outputBuffer.remaining() < compressedSize) { throw new CRAMException("Failed to allocate sufficient buffer size for RANS coder."); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java index e4160a1612..35f3c4dec3 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java @@ -157,11 +157,11 @@ public static void normaliseFrequenciesOrder1(final int[][] F, final int shift) } } - public static void normaliseFrequenciesOrder0Shift(final int[] F, final int bits){ + public static void normaliseFrequenciesOrder0Shift(final int[] frequencies, final int bits){ // compute total frequency int totalFrequency = 0; - for (int freq : F) { + for (int freq : frequencies) { totalFrequency += freq; } if (totalFrequency == 0 || totalFrequency == (1<= 0x80) { - decoder.freq[j] &= ~0x80; - decoder.freq[j] = ((decoder.freq[j] & 0x7F) << 8) | (cp.get() & 0xFF); + if ((decoder.frequencies[j] = (cp.get() & 0xFF)) >= 0x80) { + decoder.frequencies[j] &= ~0x80; + decoder.frequencies[j] = ((decoder.frequencies[j] & 0x7F) << 8) | (cp.get() & 0xFF); } - decoder.cumulativeFreq[j] = x; - decodingSymbols[j].set(decoder.cumulativeFreq[j], decoder.freq[j]); + decodingSymbols[j].set(cumulativeFrequency, decoder.frequencies[j]); /* Build reverse lookup table */ - Arrays.fill(decoder.reverseLookup, x, x + decoder.freq[j], (byte) j); + Arrays.fill(decoder.reverseLookup, cumulativeFrequency, cumulativeFrequency + decoder.frequencies[j], (byte) j); - x += decoder.freq[j]; + cumulativeFrequency += decoder.frequencies[j]; if (rle == 0 && j + 1 == (0xFF & cp.get(cp.position()))) { j = cp.get() & 0xFF; @@ -219,7 +218,7 @@ private void readStatsOrder0(final ByteBuffer cp) { } } while (j != 0); - assert (x <= Constants.TOTAL_FREQ); + assert (cumulativeFrequency <= Constants.TOTAL_FREQ); } private void readStatsOrder1(final ByteBuffer cp) { @@ -229,29 +228,28 @@ private void readStatsOrder1(final ByteBuffer cp) { int i = 0xFF & cp.get(); do { int rle_j = 0; - int x = 0; + int cumulativeFrequency = 0; int j = 0xFF & cp.get(); do { - if ((D[i].freq[j] = (0xFF & cp.get())) >= 0x80) { - D[i].freq[j] &= ~0x80; - D[i].freq[j] = ((D[i].freq[j] & 0x7F) << 8) | (0xFF & cp.get()); + if ((D[i].frequencies[j] = (0xFF & cp.get())) >= 0x80) { + D[i].frequencies[j] &= ~0x80; + D[i].frequencies[j] = ((D[i].frequencies[j] & 0x7F) << 8) | (0xFF & cp.get()); } - D[i].cumulativeFreq[j] = x; - if (D[i].freq[j] == 0) { - D[i].freq[j] = Constants.TOTAL_FREQ; + if (D[i].frequencies[j] == 0) { + D[i].frequencies[j] = Constants.TOTAL_FREQ; } decodingSymbols[i][j].set( - D[i].cumulativeFreq[j], - D[i].freq[j] + cumulativeFrequency, + D[i].frequencies[j] ); /* Build reverse lookup table */ - Arrays.fill(D[i].reverseLookup, x, x + D[i].freq[j], (byte) j); + Arrays.fill(D[i].reverseLookup, cumulativeFrequency, cumulativeFrequency + D[i].frequencies[j], (byte) j); - x += D[i].freq[j]; - assert (x <= Constants.TOTAL_FREQ); + cumulativeFrequency += D[i].frequencies[j]; + assert (cumulativeFrequency <= Constants.TOTAL_FREQ); if (rle_j == 0 && j + 1 == (0xFF & cp.get(cp.position()))) { j = (0xFF & cp.get()); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 4970c999f5..ce30da803f 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -14,6 +14,7 @@ public class RANSNx16Decode extends RANSDecode { private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); private static final int FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK = 0x01; + private static final int RLE_META_OPTIONALLY_COMPRESSED_MASK = 0x01; // This method assumes that inBuffer is already rewound. // It uncompresses the data in the inBuffer, leaving it consumed. @@ -45,7 +46,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { // if pack, get pack metadata, which will be used later to decode packed data int packDataLength = 0; int numSymbols = 0; - int[] packMappingTable = new int[0]; + int[] packMappingTable = null; if (ransNx16Params.isPack()) { packDataLength = uncompressedSize; numSymbols = inBuffer.get() & 0xFF; @@ -75,13 +76,14 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { uncompressedRLEMetaData = decodeRLEMeta(inBuffer, uncompressedRLEMetaDataLength, rleSymbols, ransNx16Params); } - ByteBuffer outBuffer = ByteBuffer.allocate(uncompressedSize);; + ByteBuffer outBuffer; // If CAT is set then, the input is uncompressed if (ransNx16Params.isCAT()) { - byte[] data = new byte[uncompressedSize]; + final byte[] data = new byte[uncompressedSize]; inBuffer.get(data, 0, uncompressedSize); outBuffer = ByteBuffer.wrap(data); } else { + outBuffer = ByteBuffer.allocate(uncompressedSize); if (uncompressedSize != 0) { switch (ransNx16Params.getOrder()) { case ZERO: @@ -258,32 +260,30 @@ private void readFrequencyTableOrder0( // and Reverse Lookup table final int[] alphabet = readAlphabet(cp); - final int[] frequencies = new int[Constants.NUMBER_OF_SYMBOLS]; + final ArithmeticDecoder decoder = getD()[0]; - // read frequencies, normalise frequencies then calculate C and R + // read frequencies, normalise frequencies for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if (alphabet[j] > 0) { - if ((frequencies[j] = (cp.get() & 0xFF)) >= 0x80){ - frequencies[j] &= ~0x80; - frequencies[j] = (( frequencies[j] &0x7f) << 7) | (cp.get() & 0x7F); + if ((decoder.frequencies[j] = (cp.get() & 0xFF)) >= 0x80){ + decoder.frequencies[j] &= ~0x80; + decoder.frequencies[j] = (( decoder.frequencies[j] &0x7f) << 7) | (cp.get() & 0x7F); } } } - Utils.normaliseFrequenciesOrder0Shift(frequencies, Constants.TOTAL_FREQ_SHIFT); - final ArithmeticDecoder decoder = getD()[0]; + Utils.normaliseFrequenciesOrder0Shift(decoder.frequencies, Constants.TOTAL_FREQ_SHIFT); + final RANSDecodingSymbol[] decodingSymbols = getDecodingSymbols()[0]; int cumulativeFrequency = 0; for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if(alphabet[j]>0){ // set RANSDecodingSymbol - decoder.freq[j] = frequencies[j]; - decoder.cumulativeFreq[j] = cumulativeFrequency; - decodingSymbols[j].set(decoder.cumulativeFreq[j], decoder.freq[j]); + decodingSymbols[j].set(cumulativeFrequency, decoder.frequencies[j]); // update Reverse Lookup table - Arrays.fill(decoder.reverseLookup, cumulativeFrequency, cumulativeFrequency + decoder.freq[j], (byte) j); - cumulativeFrequency += decoder.freq[j]; + Arrays.fill(decoder.reverseLookup, cumulativeFrequency, cumulativeFrequency + decoder.frequencies[j], (byte) j); + cumulativeFrequency += decoder.frequencies[j]; } } } @@ -291,7 +291,6 @@ private void readFrequencyTableOrder0( private void readFrequencyTableOrder1( final ByteBuffer cp, final int shift) { - final int[][] frequencies = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; final ArithmeticDecoder[] D = getD(); final RANSDecodingSymbol[][] decodingSymbols = getDecodingSymbols(); final int[] alphabet = readAlphabet(cp); @@ -304,8 +303,8 @@ private void readFrequencyTableOrder1( if (run > 0) { run--; } else { - frequencies[i][j] = Utils.readUint7(cp); - if (frequencies[i][j] == 0){ + D[i].frequencies[j] = Utils.readUint7(cp); + if (D[i].frequencies[j] == 0){ run = Utils.readUint7(cp); } } @@ -313,20 +312,18 @@ private void readFrequencyTableOrder1( } // For each symbol, normalise it's order 0 frequency table - Utils.normaliseFrequenciesOrder0Shift(frequencies[i],shift); + Utils.normaliseFrequenciesOrder0Shift(D[i].frequencies,shift); int cumulativeFreq=0; // set decoding symbols for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { - D[i].freq[j]=frequencies[i][j]; - D[i].cumulativeFreq[j]=cumulativeFreq; decodingSymbols[i][j].set( - D[i].cumulativeFreq[j], - D[i].freq[j] + cumulativeFreq, + D[i].frequencies[j] ); /* Build reverse lookup table */ - Arrays.fill(D[i].reverseLookup, cumulativeFreq, cumulativeFreq + D[i].freq[j], (byte) j); - cumulativeFreq+=frequencies[i][j]; + Arrays.fill(D[i].reverseLookup, cumulativeFreq, cumulativeFreq + D[i].frequencies[j], (byte) j); + cumulativeFreq+=D[i].frequencies[j]; } } } @@ -360,7 +357,10 @@ private ByteBuffer decodeRLEMeta( final int[] rleSymbols, final RANSNx16Params ransNx16Params) { final ByteBuffer uncompressedRLEMetaData; - if ((uncompressedRLEMetaDataLength & 0x01)!=0) { + + // The bottom bit of uncompressedRLEMetaDataLength is a flag to indicate + // whether rle metadata is uncompressed (1) or com- pressed (0). + if ((uncompressedRLEMetaDataLength & RLE_META_OPTIONALLY_COMPRESSED_MASK)!=0) { final byte[] uncompressedRLEMetaDataArray = new byte[(uncompressedRLEMetaDataLength-1)/2]; inBuffer.get(uncompressedRLEMetaDataArray, 0, (uncompressedRLEMetaDataLength-1)/2); uncompressedRLEMetaData = ByteBuffer.wrap(uncompressedRLEMetaDataArray); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index 9ddc6373a2..c07ef89e39 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -85,10 +85,9 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN // set order flag to "0" in the first byte of the outBuffer outBuffer.put(0,(byte)(outBuffer.get(0) & ~RANSNx16Params.ORDER_FLAG_MASK)); if (inputBuffer.remaining() == 0){ - outBuffer.limit(outBuffer.position()); //TODO: check if this is correct + outBuffer.limit(outBuffer.position()); outBuffer.rewind(); return outBuffer; - } compressOrder0WayN(inputBuffer, new RANSNx16Params(outBuffer.get(0)), outBuffer); return outBuffer; @@ -113,8 +112,6 @@ private void compressOrder0WayN ( initializeRANSEncoder(); final int inSize = inBuffer.remaining(); int bitSize = (int) Math.ceil(Math.log(inSize) / Math.log(2)); - - // TODO: Can bitSize be 0 and should we handle it? if (bitSize > Constants.TOTAL_FREQ_SHIFT) { bitSize = Constants.TOTAL_FREQ_SHIFT; } @@ -199,7 +196,6 @@ private void compressOrder1WayN ( Utils.normaliseFrequenciesOrder1(frequencies, Constants.TOTAL_FREQ_SHIFT); final int prefix_size = outBuffer.position(); - // TODO: How is the buffer size calculated? js: 257*257*3+9 ByteBuffer frequencyTable = allocateOutputBuffer(1); final ByteBuffer compressedFrequencyTable = allocateOutputBuffer(1); diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 559ba77ffe..ae607176be 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -122,21 +122,21 @@ public Object[][] getRansNx16Codecs() { return testCases.toArray(new Object[][]{}); } - public Object[][] getRansNx16DecodeOnlyCodecs() { + public Object[][] getRansNx16Encoder() { - // params: RANS encoder, RANS decoder, RANS params + // params: RANS encoder, RANS params return new Object[][]{ - {new RANSNx16Encode(), new RANSNx16Decode(), new RANSNx16Params(RANSNx16Params.STRIPE_FLAG_MASK)}, - {new RANSNx16Encode(), new RANSNx16Decode(), new RANSNx16Params(RANSNx16Params.ORDER_FLAG_MASK|RANSNx16Params.STRIPE_FLAG_MASK)} + {new RANSNx16Encode(), new RANSNx16Params(RANSNx16Params.STRIPE_FLAG_MASK)}, + {new RANSNx16Encode(), new RANSNx16Params(RANSNx16Params.ORDER_FLAG_MASK|RANSNx16Params.STRIPE_FLAG_MASK)} }; } - @DataProvider(name="RansNx16DecodeOnlyAndData") - public Object[][] getRansNx16DecodeOnlyAndData() { + @DataProvider(name="RansNx16RejectEncodeStripe") + public Object[][] getRansNx16RejectEncodeStripe() { // params: RANS encoder, RANS decoder, RANS params, test data // this data provider provides all the non-empty testdata input for RANS Nx16 codec - return TestNGUtils.cartesianProduct(getRansNx16DecodeOnlyCodecs(), getRansTestData()); + return TestNGUtils.cartesianProduct(getRansNx16Encoder(), getRansTestData()); } public Object[][] getAllRansCodecs() { @@ -241,12 +241,11 @@ public void testRoundTrip( } @Test( - dataProvider = "RansNx16DecodeOnlyAndData", + dataProvider = "RansNx16RejectEncodeStripe", expectedExceptions = { CRAMException.class }, expectedExceptionsMessageRegExp = "RANSNx16 Encoding with Stripe Flag is not implemented.") public void testRansNx16RejectEncodeStripe( final RANSNx16Encode ransEncode, - final RANSNx16Decode unused, final RANSNx16Params params, final TestDataEnvelope td) { From b095b1cf8e9af7f0b38d62412e5a1be2409c32f1 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Wed, 20 Dec 2023 12:02:23 -0500 Subject: [PATCH 61/76] Addressing the feedback from Nov 7 and Nov 20 - part 6 --- .../rans/rans4x8/RANS4x8Decode.java | 28 ++++++----- .../rans/ransnx16/RANSNx16Decode.java | 50 +++++++++---------- .../rans/ransnx16/RANSNx16Encode.java | 2 +- 3 files changed, 42 insertions(+), 38 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index 67b03cdb5d..1d81e3ff02 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -193,30 +193,32 @@ private void readStatsOrder0(final ByteBuffer cp) { final RANSDecodingSymbol[] decodingSymbols = getDecodingSymbols()[0]; int rle = 0; int cumulativeFrequency = 0; - int j = cp.get() & 0xFF; + int symbol = cp.get() & 0xFF; do { - if ((decoder.frequencies[j] = (cp.get() & 0xFF)) >= 0x80) { - decoder.frequencies[j] &= ~0x80; - decoder.frequencies[j] = ((decoder.frequencies[j] & 0x7F) << 8) | (cp.get() & 0xFF); + if ((decoder.frequencies[symbol] = (cp.get() & 0xFF)) >= 0x80) { + + // read a variable sized unsigned integer with ITF8 encoding + decoder.frequencies[symbol] &= ~0x80; + decoder.frequencies[symbol] = ((decoder.frequencies[symbol] & 0x7F) << 8) | (cp.get() & 0xFF); } - decodingSymbols[j].set(cumulativeFrequency, decoder.frequencies[j]); + decodingSymbols[symbol].set(cumulativeFrequency, decoder.frequencies[symbol]); /* Build reverse lookup table */ - Arrays.fill(decoder.reverseLookup, cumulativeFrequency, cumulativeFrequency + decoder.frequencies[j], (byte) j); + Arrays.fill(decoder.reverseLookup, cumulativeFrequency, cumulativeFrequency + decoder.frequencies[symbol], (byte) symbol); - cumulativeFrequency += decoder.frequencies[j]; + cumulativeFrequency += decoder.frequencies[symbol]; - if (rle == 0 && j + 1 == (0xFF & cp.get(cp.position()))) { - j = cp.get() & 0xFF; + if (rle == 0 && symbol + 1 == (0xFF & cp.get(cp.position()))) { + symbol = cp.get() & 0xFF; rle = cp.get() & 0xFF; } else if (rle != 0) { rle--; - j++; + symbol++; } else { - j = cp.get() & 0xFF; + symbol = cp.get() & 0xFF; } - } while (j != 0); + } while (symbol != 0); assert (cumulativeFrequency <= Constants.TOTAL_FREQ); } @@ -232,6 +234,8 @@ private void readStatsOrder1(final ByteBuffer cp) { int j = 0xFF & cp.get(); do { if ((D[i].frequencies[j] = (0xFF & cp.get())) >= 0x80) { + + // read a variable sized unsigned integer with ITF8 encoding D[i].frequencies[j] &= ~0x80; D[i].frequencies[j] = ((D[i].frequencies[j] & 0x7F) << 8) | (0xFF & cp.get()); } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index ce30da803f..c0cc8d97ae 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -46,16 +46,16 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { // if pack, get pack metadata, which will be used later to decode packed data int packDataLength = 0; int numSymbols = 0; - int[] packMappingTable = null; + byte[] packMappingTable = null; if (ransNx16Params.isPack()) { packDataLength = uncompressedSize; numSymbols = inBuffer.get() & 0xFF; // if (numSymbols > 16 or numSymbols==0), raise exception if (numSymbols <= 16 && numSymbols != 0) { - packMappingTable = new int[numSymbols]; + packMappingTable = new byte[numSymbols]; for (int i = 0; i < numSymbols; i++) { - packMappingTable[i] = inBuffer.get() & 0xFF; + packMappingTable[i] = inBuffer.get(); } uncompressedSize = Utils.readUint7(inBuffer); } else { @@ -77,13 +77,20 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { } ByteBuffer outBuffer; + // If CAT is set then, the input is uncompressed if (ransNx16Params.isCAT()) { - final byte[] data = new byte[uncompressedSize]; - inBuffer.get(data, 0, uncompressedSize); - outBuffer = ByteBuffer.wrap(data); + outBuffer = inBuffer.slice(); + + // While resetting the position to the end is not strictly necessary, + // it is being done for the sake of completeness and + // to meet the requirements of the tests that verify the boundary conditions. + inBuffer.position(inBuffer.limit()); } else { outBuffer = ByteBuffer.allocate(uncompressedSize); + + // uncompressedSize is 0 in cases where Pack flag is used + // and number of distinct symbols in the raw data is 1 if (uncompressedSize != 0) { switch (ransNx16Params.getOrder()) { case ZERO: @@ -126,8 +133,6 @@ private void uncompressOrder0WayN( // Nway parallel rans states. Nway = 4 or 32 final long[] rans = new long[Nway]; - // symbols is the array of decoded symbols - final byte[] symbols = new byte[Nway]; for (int r=0; r 0) { - if ((decoder.frequencies[j] = (cp.get() & 0xFF)) >= 0x80){ - decoder.frequencies[j] &= ~0x80; - decoder.frequencies[j] = (( decoder.frequencies[j] &0x7f) << 7) | (cp.get() & 0x7F); - } + decoder.frequencies[j] = Utils.readUint7(cp); } } Utils.normaliseFrequenciesOrder0Shift(decoder.frequencies, Constants.TOTAL_FREQ_SHIFT); @@ -294,7 +296,6 @@ private void readFrequencyTableOrder1( final ArithmeticDecoder[] D = getD(); final RANSDecodingSymbol[][] decodingSymbols = getDecodingSymbols(); final int[] alphabet = readAlphabet(cp); - for (int i=0; i < Constants.NUMBER_OF_SYMBOLS; i++) { if (alphabet[i] > 0) { int run = 0; @@ -305,7 +306,7 @@ private void readFrequencyTableOrder1( } else { D[i].frequencies[j] = Utils.readUint7(cp); if (D[i].frequencies[j] == 0){ - run = Utils.readUint7(cp); + run = cp.get() & 0xFF; } } } @@ -412,14 +413,14 @@ private ByteBuffer decodeRLE( private ByteBuffer decodePack( final ByteBuffer inBuffer, - final int[] packMappingTable, + final byte[] packMappingTable, final int numSymbols, final int uncompressedPackOutputLength) { final ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength); int j = 0; if (numSymbols <= 1) { for (int i=0; i < uncompressedPackOutputLength; i++){ - outBufferPack.put(i, (byte) packMappingTable[0]); + outBufferPack.put(i, packMappingTable[0]); } } @@ -430,7 +431,7 @@ else if (numSymbols <= 2) { if (i % 8 == 0){ v = inBuffer.get(j++); } - outBufferPack.put(i, (byte) packMappingTable[v & 1]); + outBufferPack.put(i, packMappingTable[v & 1]); v >>=1; } } @@ -442,7 +443,7 @@ else if (numSymbols <= 4){ if (i % 4 == 0){ v = inBuffer.get(j++); } - outBufferPack.put(i, (byte) packMappingTable[v & 3]); + outBufferPack.put(i, packMappingTable[v & 3]); v >>=2; } } @@ -454,7 +455,7 @@ else if (numSymbols <= 16){ if (i % 2 == 0){ v = inBuffer.get(j++); } - outBufferPack.put(i, (byte) packMappingTable[v & 15]); + outBufferPack.put(i, packMappingTable[v & 15]); v >>=4; } } @@ -464,10 +465,9 @@ else if (numSymbols <= 16){ private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){ final int numInterleaveStreams = inBuffer.get() & 0xFF; - // retrieve lengths of compressed interleaved streams - final int[] compressedLengths = new int[numInterleaveStreams]; + // read lengths of compressed interleaved streams for ( int j=0; j Date: Thu, 21 Dec 2023 15:42:42 -0500 Subject: [PATCH 62/76] Addressing the feedback from Nov 7 and Nov 20 - part 6 --- .../cram/compression/rans/RANSEncode.java | 16 --------- .../samtools/cram/compression/rans/Utils.java | 31 +++++++++++++++++ .../rans/rans4x8/RANS4x8Decode.java | 4 +-- .../rans/rans4x8/RANS4x8Encode.java | 16 ++++----- .../rans/ransnx16/RANSNx16Decode.java | 24 +++++++------- .../rans/ransnx16/RANSNx16Encode.java | 33 ++++++++++--------- .../htsjdk/samtools/cram/RANSInteropTest.java | 7 ++-- .../cram/compression/rans/RansTest.java | 12 +++---- 8 files changed, 79 insertions(+), 64 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java index 02c923e631..49b12dd275 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java @@ -1,8 +1,6 @@ package htsjdk.samtools.cram.compression.rans; -import htsjdk.samtools.cram.CRAMException; import java.nio.ByteBuffer; -import java.nio.ByteOrder; public abstract class RANSEncode { private RANSEncodingSymbol[][] encodingSymbols; @@ -35,20 +33,6 @@ protected void initializeRANSEncoder() { } } - protected ByteBuffer allocateOutputBuffer(final int inSize) { - // This calculation is identical to the one in samtools rANS_static.c - // Presumably the frequency table (always big enough for order 1) = 257*257, - // then * 3 for each entry (byte->symbol, 2 bytes -> scaled frequency), - // + 9 for the header (order byte, and 2 int lengths for compressed/uncompressed lengths). - final int compressedSize = (int) (inSize + 257 * 257 * 3 + 9); - final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize); - if (outputBuffer.remaining() < compressedSize) { - throw new CRAMException("Failed to allocate sufficient buffer size for RANS coder."); - } - outputBuffer.order(ByteOrder.LITTLE_ENDIAN); - return outputBuffer; - } - protected void buildSymsOrder0(final int[] frequencies) { updateEncodingSymbols(frequencies, getEncodingSymbols()[0]); } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java index 35f3c4dec3..de7272eb53 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java @@ -1,6 +1,8 @@ package htsjdk.samtools.cram.compression.rans; +import htsjdk.samtools.cram.CRAMException; import java.nio.ByteBuffer; +import java.nio.ByteOrder; final public class Utils { @@ -191,4 +193,33 @@ public static void normaliseFrequenciesOrder1Shift(final int[][] F, final int sh } } } + + public static ByteBuffer allocateOutputBuffer(final int inSize) { + // This calculation is identical to the one in samtools rANS_static.c + // Presumably the frequency table (always big enough for order 1) = 257*257, + // then * 3 for each entry (byte->symbol, 2 bytes -> scaled frequency), + // + 9 for the header (order byte, and 2 int lengths for compressed/uncompressed lengths). + final int compressedSize = (int) (inSize + 257 * 257 * 3 + 9); + final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize).order(ByteOrder.LITTLE_ENDIAN); + if (outputBuffer.remaining() < compressedSize) { + throw new CRAMException("Failed to allocate sufficient buffer size for RANS coder."); + } + return outputBuffer; + } + + // returns a new LITTLE_ENDIAN ByteBuffer of size = bufferSize + public static ByteBuffer allocateByteBuffer(final int bufferSize){ + return ByteBuffer.allocate(bufferSize).order(ByteOrder.LITTLE_ENDIAN); + } + + // returns a LITTLE_ENDIAN ByteBuffer that is created by wrapping a byte[] + public static ByteBuffer wrap(final byte[] inputBytes){ + return ByteBuffer.wrap(inputBytes).order(ByteOrder.LITTLE_ENDIAN); + } + + // returns a LITTLE_ENDIAN ByteBuffer that is created by inputBuffer.slice() + public static ByteBuffer slice(final ByteBuffer inputBuffer){ + return inputBuffer.slice().order(ByteOrder.LITTLE_ENDIAN); + } + } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index 1d81e3ff02..6efcc31868 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -15,7 +15,7 @@ public class RANS4x8Decode extends RANSDecode { private static final int RAW_BYTE_LENGTH = 4; - private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + private static final ByteBuffer EMPTY_BUFFER = Utils.allocateByteBuffer(0); // This method assumes that inBuffer is already rewound. // It uncompresses the data in the inBuffer, leaving it consumed. @@ -39,7 +39,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { // uncompressed bytes length final int outSize = inBuffer.getInt(); - final ByteBuffer outBuffer = ByteBuffer.allocate(outSize); + final ByteBuffer outBuffer = Utils.allocateByteBuffer(outSize); initializeRANSDecoder(); switch (order) { case ZERO: diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index d8726a47dc..8ac4b618b3 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -15,7 +15,7 @@ public class RANS4x8Encode extends RANSEncode { // streams smaller than this value don't have sufficient symbol context for ORDER-1 encoding, // so always use ORDER-0 private static final int MINIMUM_ORDER_1_SIZE = 4; - private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + private static final ByteBuffer EMPTY_BUFFER = Utils.allocateByteBuffer(0); // This method assumes that inBuffer is already rewound. // It compresses the data in the inBuffer, leaving it consumed. @@ -44,7 +44,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { final int inputSize = inBuffer.remaining(); - final ByteBuffer outBuffer = allocateOutputBuffer(inputSize); + final ByteBuffer outBuffer = Utils.allocateOutputBuffer(inputSize); // move the output buffer ahead to the start of the frequency table (we'll come back and // write the output stream prefix at the end of this method) @@ -55,7 +55,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { // using the normalised frequencies, set the RANSEncodingSymbols buildSymsOrder0(normalizedFreq); - final ByteBuffer cp = outBuffer.slice(); + final ByteBuffer cp = Utils.slice(outBuffer); // write Frequency table final int frequencyTableSize = writeFrequenciesOrder0(cp, normalizedFreq); @@ -65,7 +65,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; final int in_size = inBuffer.remaining(); long rans0, rans1, rans2, rans3; - final ByteBuffer ptr = cp.slice(); + final ByteBuffer ptr = Utils.slice(cp); rans0 = Constants.RANS_4x8_LOWER_BOUND; rans1 = Constants.RANS_4x8_LOWER_BOUND; rans2 = Constants.RANS_4x8_LOWER_BOUND; @@ -94,6 +94,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { rans0 = syms[0xFF & c0].putSymbol4x8(rans0, ptr); } + ptr.order(ByteOrder.BIG_ENDIAN); ptr.putInt((int) rans3); ptr.putInt((int) rans2); ptr.putInt((int) rans1); @@ -111,7 +112,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { final int inSize = inBuffer.remaining(); - final ByteBuffer outBuffer = allocateOutputBuffer(inSize); + final ByteBuffer outBuffer = Utils.allocateOutputBuffer(inSize); // move to start of frequency outBuffer.position(Constants.RANS_4x8_PREFIX_BYTE_LENGTH); @@ -122,7 +123,7 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { // using the normalised frequencies, set the RANSEncodingSymbols buildSymsOrder1(normalizedFreq); - final ByteBuffer cp = outBuffer.slice(); + final ByteBuffer cp = Utils.slice(outBuffer); final int frequencyTableSize = writeFrequenciesOrder1(cp, normalizedFreq); inBuffer.rewind(); final int in_size = inBuffer.remaining(); @@ -155,7 +156,7 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { byte l3 = inBuffer.get(in_size - 1); // Slicing is needed for buffer reversing later - final ByteBuffer ptr = cp.slice(); + final ByteBuffer ptr = Utils.slice(cp); final RANSEncodingSymbol[][] syms = getEncodingSymbols(); for (i3 = in_size - 2; i3 > 4 * isz4 - 2 && i3 >= 0; i3--) { final byte c3 = inBuffer.get(i3); @@ -216,7 +217,6 @@ private static void writeCompressionPrefix( // go back to the beginning of the stream and write the prefix values // write the (ORDER as a single byte at offset 0) outBuffer.put(0, (byte) (order == RANSParams.ORDER.ZERO ? 0 : 1)); - outBuffer.order(ByteOrder.LITTLE_ENDIAN); // move past the ORDER and write the compressed size outBuffer.putInt(Constants.RANS_4x8_ORDER_BYTE_LENGTH, frequencyTableSize + compressedBlobSize); // move past the compressed size and write the uncompressed size diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index c0cc8d97ae..abd4ac85da 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -12,7 +12,7 @@ import java.util.Arrays; public class RANSNx16Decode extends RANSDecode { - private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + private static final ByteBuffer EMPTY_BUFFER = Utils.allocateByteBuffer(0); private static final int FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK = 0x01; private static final int RLE_META_OPTIONALLY_COMPRESSED_MASK = 0x01; @@ -80,14 +80,14 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { // If CAT is set then, the input is uncompressed if (ransNx16Params.isCAT()) { - outBuffer = inBuffer.slice(); + outBuffer = Utils.slice(inBuffer); // While resetting the position to the end is not strictly necessary, // it is being done for the sake of completeness and // to meet the requirements of the tests that verify the boundary conditions. inBuffer.position(inBuffer.limit()); } else { - outBuffer = ByteBuffer.allocate(uncompressedSize); + outBuffer = Utils.allocateByteBuffer(uncompressedSize); // uncompressedSize is 0 in cases where Pack flag is used // and number of distinct symbols in the raw data is 1 @@ -194,9 +194,8 @@ private void uncompressOrder1WayN( inBuffer.get(compressedFreqTable,0,compressedLength); // decode the compressedFreqTable to get the uncompressedFreqTable using RANS Nx16, N=4 Order 0 uncompress - freqTableSource = ByteBuffer.allocate(uncompressedLength); - final ByteBuffer compressedFrequencyTableBuffer = ByteBuffer.wrap(compressedFreqTable); - compressedFrequencyTableBuffer.order(ByteOrder.LITTLE_ENDIAN); + freqTableSource = Utils.allocateByteBuffer(uncompressedLength); + final ByteBuffer compressedFrequencyTableBuffer = Utils.wrap(compressedFreqTable); // uncompress using RANSNx16 Order 0, Nway = 4 // formatFlags = (~RANSNx16Params.ORDER_FLAG_MASK & ~RANSNx16Params.N32_FLAG_MASK) = ~(RANSNx16Params.ORDER_FLAG_MASK | RANSNx16Params.N32_FLAG_MASK) @@ -364,14 +363,13 @@ private ByteBuffer decodeRLEMeta( if ((uncompressedRLEMetaDataLength & RLE_META_OPTIONALLY_COMPRESSED_MASK)!=0) { final byte[] uncompressedRLEMetaDataArray = new byte[(uncompressedRLEMetaDataLength-1)/2]; inBuffer.get(uncompressedRLEMetaDataArray, 0, (uncompressedRLEMetaDataLength-1)/2); - uncompressedRLEMetaData = ByteBuffer.wrap(uncompressedRLEMetaDataArray); + uncompressedRLEMetaData = Utils.wrap(uncompressedRLEMetaDataArray); } else { final int compressedRLEMetaDataLength = Utils.readUint7(inBuffer); final byte[] compressedRLEMetaDataArray = new byte[compressedRLEMetaDataLength]; inBuffer.get(compressedRLEMetaDataArray,0,compressedRLEMetaDataLength); - final ByteBuffer compressedRLEMetaData = ByteBuffer.wrap(compressedRLEMetaDataArray); - compressedRLEMetaData.order(ByteOrder.LITTLE_ENDIAN); - uncompressedRLEMetaData = ByteBuffer.allocate(uncompressedRLEMetaDataLength / 2); + final ByteBuffer compressedRLEMetaData = Utils.wrap(compressedRLEMetaDataArray); + uncompressedRLEMetaData = Utils.allocateByteBuffer(uncompressedRLEMetaDataLength / 2); // uncompress using Order 0 and N = Nway uncompressOrder0WayN( compressedRLEMetaData, @@ -395,7 +393,7 @@ private ByteBuffer decodeRLE( final int[] rleSymbols, final ByteBuffer uncompressedRLEMetaData, final int uncompressedRLEOutputLength) { - final ByteBuffer rleOutBuffer = ByteBuffer.allocate(uncompressedRLEOutputLength); + final ByteBuffer rleOutBuffer = Utils.allocateByteBuffer(uncompressedRLEOutputLength); int j = 0; for(int i = 0; j< uncompressedRLEOutputLength; i++){ final byte sym = inBuffer.get(i); @@ -416,7 +414,7 @@ private ByteBuffer decodePack( final byte[] packMappingTable, final int numSymbols, final int uncompressedPackOutputLength) { - final ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength); + final ByteBuffer outBufferPack = Utils.allocateByteBuffer(uncompressedPackOutputLength); int j = 0; if (numSymbols <= 1) { for (int i=0; i < uncompressedPackOutputLength; i++){ @@ -483,7 +481,7 @@ private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){ } // Transpose - final ByteBuffer outBuffer = ByteBuffer.allocate(outSize); + final ByteBuffer outBuffer = Utils.allocateByteBuffer(outSize); for (int j = 0; j { // Stripe flag is not implemented in the write implementation ///////////////////////////////////////////////////////////////////////////////////////////////// - private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + private static final ByteBuffer EMPTY_BUFFER = Utils.allocateByteBuffer(0); // This method assumes that inBuffer is already rewound. // It compresses the data in the inBuffer, leaving it consumed. @@ -24,7 +24,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } - final ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining()); + final ByteBuffer outBuffer = Utils.allocateOutputBuffer(inBuffer.remaining()); final int formatFlags = ransNx16Params.getFormatFlags(); outBuffer.put((byte) (formatFlags)); // one byte for formatFlags @@ -117,7 +117,7 @@ private void compressOrder0WayN ( } final int prefix_size = outBuffer.position(); final int[] F = buildFrequenciesOrder0(inBuffer); - final ByteBuffer cp = outBuffer.slice(); + final ByteBuffer cp = Utils.slice(outBuffer); // Normalize Frequencies such that sum of Frequencies = 1 << bitsize Utils.normaliseFrequenciesOrder0(F, bitSize); @@ -150,7 +150,7 @@ private void compressOrder0WayN ( for (int r=0; r0){ @@ -170,6 +170,8 @@ private void compressOrder0WayN ( rans[r] = ransEncodingSymbols[0xFF & symbol[r]].putSymbolNx16(rans[r], ptr); } } + + ptr.order(ByteOrder.BIG_ENDIAN); for (int i=Nway-1; i>=0; i--){ ptr.putInt((int) rans[i]); } @@ -196,8 +198,8 @@ private void compressOrder1WayN ( Utils.normaliseFrequenciesOrder1(frequencies, Constants.TOTAL_FREQ_SHIFT); final int prefix_size = outBuffer.position(); - ByteBuffer frequencyTable = allocateOutputBuffer(1); - final ByteBuffer compressedFrequencyTable = allocateOutputBuffer(1); + ByteBuffer frequencyTable = Utils.allocateOutputBuffer(1); + final ByteBuffer compressedFrequencyTable = Utils.allocateOutputBuffer(1); // uncompressed frequency table final int uncompressedFrequencyTableSize = writeFrequenciesOrder1(frequencyTable,frequencies); @@ -214,7 +216,7 @@ private void compressOrder1WayN ( // TODO: we should work on a more permanent solution for this issue! initializeRANSEncoder(); final int compressedFrequencyTableSize = compressedFrequencyTable.limit(); - final ByteBuffer cp = outBuffer.slice(); + final ByteBuffer cp = Utils.slice(outBuffer); // spec: The order-1 frequency table itself may still be quite large, // so is optionally compressed using the order-0 rANSNx16 codec with a fixed 4-way interleaving. @@ -281,7 +283,7 @@ private void compressOrder1WayN ( } // Slicing is needed for buffer reversing later. - final ByteBuffer ptr = cp.slice(); + final ByteBuffer ptr = Utils.slice(cp); final RANSEncodingSymbol[][] ransEncodingSymbols = getEncodingSymbols(); final byte[] context = new byte[Nway]; @@ -327,7 +329,6 @@ private void compressOrder1WayN ( inBuffer.position(inBuffer.limit()); outBuffer.rewind(); outBuffer.limit(prefix_size + frequencyTableSize + compressedBlobSize); - outBuffer.order(ByteOrder.LITTLE_ENDIAN); } private static int[] buildFrequenciesOrder0(final ByteBuffer inBuffer) { @@ -501,7 +502,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff // create rleMetaData buffer to store rle metadata. // This buffer will be compressed using compressOrder0WayN towards the end of this method // TODO: How did we come up with this calculation for Buffer size? numRLESymbols+1+inputSize - final ByteBuffer rleMetaData = ByteBuffer.allocate(numRLESymbols+1+inputSize); // rleMetaData + final ByteBuffer rleMetaData = Utils.allocateByteBuffer(numRLESymbols+1+inputSize); // rleMetaData // write number of symbols that are run length encoded rleMetaData.put((byte) numRLESymbols); @@ -516,7 +517,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff // Apply RLE // encodedBuffer -> input src data without repetition - final ByteBuffer encodedBuffer = ByteBuffer.allocate(inputSize); // rleInBuffer + final ByteBuffer encodedBuffer = Utils.allocateByteBuffer(inputSize); // rleInBuffer int encodedBufferIdx = 0; // rleInBufferIndex for (int i = 0; i < inputSize; i++) { @@ -544,7 +545,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff rleMetaData.rewind(); // compress the rleMetaData Buffer - final ByteBuffer compressedRleMetaData = allocateOutputBuffer(rleMetaData.remaining()); + final ByteBuffer compressedRleMetaData = Utils.allocateOutputBuffer(rleMetaData.remaining()); // compress using Order 0 and N = Nway compressOrder0WayN(rleMetaData, new RANSNx16Params(0x00 | ransNx16Params.getFormatFlags() & RANSNx16Params.N32_FLAG_MASK),compressedRleMetaData); @@ -573,12 +574,12 @@ private ByteBuffer encodePack( final int inSize = inBuffer.remaining(); final ByteBuffer encodedBuffer; if (numSymbols <= 1) { - encodedBuffer = ByteBuffer.allocate(0); + encodedBuffer = Utils.allocateByteBuffer(0); } else if (numSymbols <= 2) { // 1 bit per value final int encodedBufferSize = (int) Math.ceil((double) inSize/8); - encodedBuffer = ByteBuffer.allocate(encodedBufferSize); + encodedBuffer = Utils.allocateByteBuffer(encodedBufferSize); int j = -1; for (int i = 0; i < inSize; i ++) { if (i % 8 == 0) { @@ -590,7 +591,7 @@ private ByteBuffer encodePack( // 2 bits per value final int encodedBufferSize = (int) Math.ceil((double) inSize/4); - encodedBuffer = ByteBuffer.allocate(encodedBufferSize); + encodedBuffer = Utils.allocateByteBuffer(encodedBufferSize); int j = -1; for (int i = 0; i < inSize; i ++) { if (i % 4 == 0) { @@ -602,7 +603,7 @@ private ByteBuffer encodePack( // 4 bits per value final int encodedBufferSize = (int) Math.ceil((double)inSize/2); - encodedBuffer = ByteBuffer.allocate(encodedBufferSize); + encodedBuffer = Utils.allocateByteBuffer(encodedBufferSize); int j = -1; for (int i = 0; i < inSize; i ++) { if (i % 2 == 0) { diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index 1616f28c7f..c8379e8f63 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -4,6 +4,7 @@ import htsjdk.samtools.cram.compression.rans.RANSDecode; import htsjdk.samtools.cram.compression.rans.RANSEncode; import htsjdk.samtools.cram.compression.rans.RANSParams; +import htsjdk.samtools.cram.compression.rans.Utils; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; @@ -176,7 +177,7 @@ public void testRANSRoundTrip( // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results - final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer uncompressedInteropBytes = Utils.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); // Stripe Flag is not implemented in RANSNx16 Encoder. // The encoder throws CRAMException if Stripe Flag is used. @@ -207,8 +208,8 @@ public void testDecodeOnly( // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results - final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); - final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream)); + final ByteBuffer uncompressedInteropBytes = Utils.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer preCompressedInteropBytes = Utils.wrap(IOUtils.toByteArray(preCompressedInteropStream)); // Use htsjdk to uncompress the precompressed file from htscodecs repo final ByteBuffer uncompressedHtsjdkBytes = ransDecode.uncompress(preCompressedInteropBytes); diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index ae607176be..4e8a1e6bd2 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -174,7 +174,7 @@ public void testRoundTripTinySmallLarge( final TestDataEnvelope td, final Integer lowerLimit, final Integer upperLimit){ - final ByteBuffer in = ByteBuffer.wrap(td.testArray); + final ByteBuffer in = Utils.wrap(td.testArray); for (int rawSize = lowerLimit; rawSize < upperLimit; rawSize++) { in.position(0); in.limit(rawSize); @@ -188,7 +188,7 @@ public void testRans4x8BuffersMeetBoundaryExpectations( final RANS4x8Decode ransDecode, final RANS4x8Params params) { final int rawSize = 1001; - final ByteBuffer rawData = ByteBuffer.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); + final ByteBuffer rawData = Utils.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(rawSize,rawData,ransEncode, ransDecode,params); Assert.assertTrue(compressed.limit() > Constants.RANS_4x8_PREFIX_BYTE_LENGTH); // minimum prefix len when input is not Empty Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); @@ -202,7 +202,7 @@ public void testRansNx16BuffersMeetBoundaryExpectations( final RANSNx16Decode ransDecode, final RANSNx16Params params) { final int rawSize = 1001; - final ByteBuffer rawData = ByteBuffer.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); + final ByteBuffer rawData = Utils.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(rawSize,rawData,ransEncode,ransDecode,params); rawData.rewind(); Assert.assertTrue(compressed.limit() > 1); // minimum prefix len when input is not Empty @@ -237,7 +237,7 @@ public void testRoundTrip( final RANSDecode ransDecode, final RANSParams params, final TestDataEnvelope td) { - ransRoundTrip(ransEncode, ransDecode, params, ByteBuffer.wrap(td.testArray)); + ransRoundTrip(ransEncode, ransDecode, params, Utils.wrap(td.testArray)); } @Test( @@ -251,7 +251,7 @@ public void testRansNx16RejectEncodeStripe( // When td is not Empty, Encoding with Stripe Flag should throw an Exception // as Encode Stripe is not implemented - ransEncode.compress(ByteBuffer.wrap(td.testArray), params); + ransEncode.compress(Utils.wrap(td.testArray), params); } @Test( @@ -261,7 +261,7 @@ public void testRansNx16RejectEncodeStripe( expectedExceptionsMessageRegExp = "Bit Packing is not permitted when number " + "of distinct symbols is greater than 16 or equal to 0. Number of distinct symbols: 0") public void testRANSNx16RejectDecodePack(){ - final ByteBuffer compressedData = ByteBuffer.wrap(new byte[]{(byte) RANSNx16Params.PACK_FLAG_MASK, (byte) 0x00, (byte) 0x00}); + final ByteBuffer compressedData = Utils.wrap(new byte[]{(byte) RANSNx16Params.PACK_FLAG_MASK, (byte) 0x00, (byte) 0x00}); final RANSNx16Decode ransDecode = new RANSNx16Decode(); ransDecode.uncompress(compressedData); } From 52549f543e2c0f0e895c7e31b5c2e9810cc7f92e Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Wed, 10 Jan 2024 14:22:06 -0500 Subject: [PATCH 63/76] Move common code to CompressionUtils --- .../cram/compression/CompressionUtils.java | 179 ++++++++++++++++++ .../samtools/cram/compression/rans/Utils.java | 56 ------ .../rans/rans4x8/RANS4x8Decode.java | 5 +- .../rans/rans4x8/RANS4x8Encode.java | 15 +- .../rans/ransnx16/RANSNx16Decode.java | 97 +++------- .../rans/ransnx16/RANSNx16Encode.java | 104 ++-------- .../htsjdk/samtools/cram/RANSInteropTest.java | 8 +- .../cram/compression/rans/RansTest.java | 15 +- 8 files changed, 246 insertions(+), 233 deletions(-) create mode 100644 src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java b/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java new file mode 100644 index 0000000000..6d9a725696 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java @@ -0,0 +1,179 @@ +package htsjdk.samtools.cram.compression; + +import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.rans.Constants; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public class CompressionUtils { + public static void writeUint7(final int i, final ByteBuffer cp) { + int s = 0; + int X = i; + do { + s += 7; + X >>= 7; + } while (X > 0); + do { + s -= 7; + //writeByte + final int s_ = (s > 0) ? 1 : 0; + cp.put((byte) (((i >> s) & 0x7f) + (s_ << 7))); + } while (s > 0); + } + + public static int readUint7(final ByteBuffer cp) { + int i = 0; + int c; + do { + //read byte + c = cp.get(); + i = (i << 7) | (c & 0x7f); + } while ((c & 0x80) != 0); + return i; + } + + public static ByteBuffer encodePack( + final ByteBuffer inBuffer, + final ByteBuffer outBuffer, + final int[] frequencyTable, + final int[] packMappingTable, + final int numSymbols){ + final int inSize = inBuffer.remaining(); + final ByteBuffer encodedBuffer; + if (numSymbols <= 1) { + encodedBuffer = CompressionUtils.allocateByteBuffer(0); + } else if (numSymbols <= 2) { + + // 1 bit per value + final int encodedBufferSize = (int) Math.ceil((double) inSize/8); + encodedBuffer = CompressionUtils.allocateByteBuffer(encodedBufferSize); + int j = -1; + for (int i = 0; i < inSize; i ++) { + if (i % 8 == 0) { + encodedBuffer.put(++j, (byte) 0); + } + encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << (i % 8)))); + } + } else if (numSymbols <= 4) { + + // 2 bits per value + final int encodedBufferSize = (int) Math.ceil((double) inSize/4); + encodedBuffer = CompressionUtils.allocateByteBuffer(encodedBufferSize); + int j = -1; + for (int i = 0; i < inSize; i ++) { + if (i % 4 == 0) { + encodedBuffer.put(++j, (byte) 0); + } + encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 4) * 2)))); + } + } else { + + // 4 bits per value + final int encodedBufferSize = (int) Math.ceil((double)inSize/2); + encodedBuffer = CompressionUtils.allocateByteBuffer(encodedBufferSize); + int j = -1; + for (int i = 0; i < inSize; i ++) { + if (i % 2 == 0) { + encodedBuffer.put(++j, (byte) 0); + } + encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 2) * 4)))); + } + } + + // write numSymbols + outBuffer.put((byte) numSymbols); + + // write mapping table "packMappingTable" that converts mapped value to original symbol + for(int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i ++) { + if (frequencyTable[i] > 0) { + outBuffer.put((byte) i); + } + } + + // write the length of data + CompressionUtils.writeUint7(encodedBuffer.limit(), outBuffer); + return encodedBuffer; // Here position = 0 since we have always accessed the data buffer using index + } + + public static ByteBuffer decodePack( + final ByteBuffer inBuffer, + final byte[] packMappingTable, + final int numSymbols, + final int uncompressedPackOutputLength) { + final ByteBuffer outBufferPack = CompressionUtils.allocateByteBuffer(uncompressedPackOutputLength); + int j = 0; + if (numSymbols <= 1) { + for (int i=0; i < uncompressedPackOutputLength; i++){ + outBufferPack.put(i, packMappingTable[0]); + } + } + + // 1 bit per value + else if (numSymbols <= 2) { + int v = 0; + for (int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 8 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, packMappingTable[v & 1]); + v >>=1; + } + } + + // 2 bits per value + else if (numSymbols <= 4){ + int v = 0; + for(int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 4 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, packMappingTable[v & 3]); + v >>=2; + } + } + + // 4 bits per value + else if (numSymbols <= 16){ + int v = 0; + for(int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 2 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, packMappingTable[v & 15]); + v >>=4; + } + } + return outBufferPack; + } + + + + public static ByteBuffer allocateOutputBuffer(final int inSize) { + // This calculation is identical to the one in samtools rANS_static.c + // Presumably the frequency table (always big enough for order 1) = 257*257, + // then * 3 for each entry (byte->symbol, 2 bytes -> scaled frequency), + // + 9 for the header (order byte, and 2 int lengths for compressed/uncompressed lengths). + final int compressedSize = (int) (inSize + 257 * 257 * 3 + 9); + final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize).order(ByteOrder.LITTLE_ENDIAN); + if (outputBuffer.remaining() < compressedSize) { + throw new CRAMException("Failed to allocate sufficient buffer size for RANS coder."); + } + return outputBuffer; + } + + // returns a new LITTLE_ENDIAN ByteBuffer of size = bufferSize + public static ByteBuffer allocateByteBuffer(final int bufferSize){ + return ByteBuffer.allocate(bufferSize).order(ByteOrder.LITTLE_ENDIAN); + } + + // returns a LITTLE_ENDIAN ByteBuffer that is created by wrapping a byte[] + public static ByteBuffer wrap(final byte[] inputBytes){ + return ByteBuffer.wrap(inputBytes).order(ByteOrder.LITTLE_ENDIAN); + } + + // returns a LITTLE_ENDIAN ByteBuffer that is created by inputBuffer.slice() + public static ByteBuffer slice(final ByteBuffer inputBuffer){ + return inputBuffer.slice().order(ByteOrder.LITTLE_ENDIAN); + } +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java index de7272eb53..06abbca89d 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java @@ -1,8 +1,6 @@ package htsjdk.samtools.cram.compression.rans; -import htsjdk.samtools.cram.CRAMException; import java.nio.ByteBuffer; -import java.nio.ByteOrder; final public class Utils { @@ -54,32 +52,6 @@ public static long RANSDecodeRenormalizeNx16(final long r, final ByteBuffer byte return ret; } - public static void writeUint7(final int i, final ByteBuffer cp) { - int s = 0; - int X = i; - do { - s += 7; - X >>= 7; - } while (X > 0); - do { - s -= 7; - //writeByte - final int s_ = (s > 0) ? 1 : 0; - cp.put((byte) (((i >> s) & 0x7f) + (s_ << 7))); - } while (s > 0); - } - - public static int readUint7(final ByteBuffer cp) { - int i = 0; - int c; - do { - //read byte - c = cp.get(); - i = (i << 7) | (c & 0x7f); - } while ((c & 0x80) != 0); - return i; - } - public static void normaliseFrequenciesOrder0(final int[] F, final int bits) { // Returns an array of normalised Frequencies, // such that the frequencies add up to 1<symbol, 2 bytes -> scaled frequency), - // + 9 for the header (order byte, and 2 int lengths for compressed/uncompressed lengths). - final int compressedSize = (int) (inSize + 257 * 257 * 3 + 9); - final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize).order(ByteOrder.LITTLE_ENDIAN); - if (outputBuffer.remaining() < compressedSize) { - throw new CRAMException("Failed to allocate sufficient buffer size for RANS coder."); - } - return outputBuffer; - } - - // returns a new LITTLE_ENDIAN ByteBuffer of size = bufferSize - public static ByteBuffer allocateByteBuffer(final int bufferSize){ - return ByteBuffer.allocate(bufferSize).order(ByteOrder.LITTLE_ENDIAN); - } - - // returns a LITTLE_ENDIAN ByteBuffer that is created by wrapping a byte[] - public static ByteBuffer wrap(final byte[] inputBytes){ - return ByteBuffer.wrap(inputBytes).order(ByteOrder.LITTLE_ENDIAN); - } - - // returns a LITTLE_ENDIAN ByteBuffer that is created by inputBuffer.slice() - public static ByteBuffer slice(final ByteBuffer inputBuffer){ - return inputBuffer.slice().order(ByteOrder.LITTLE_ENDIAN); - } - } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index 6efcc31868..25b9b773e9 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -1,6 +1,7 @@ package htsjdk.samtools.cram.compression.rans.rans4x8; import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.CompressionUtils; import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSDecode; @@ -15,7 +16,7 @@ public class RANS4x8Decode extends RANSDecode { private static final int RAW_BYTE_LENGTH = 4; - private static final ByteBuffer EMPTY_BUFFER = Utils.allocateByteBuffer(0); + private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0); // This method assumes that inBuffer is already rewound. // It uncompresses the data in the inBuffer, leaving it consumed. @@ -39,7 +40,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { // uncompressed bytes length final int outSize = inBuffer.getInt(); - final ByteBuffer outBuffer = Utils.allocateByteBuffer(outSize); + final ByteBuffer outBuffer = CompressionUtils.allocateByteBuffer(outSize); initializeRANSDecoder(); switch (order) { case ZERO: diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index 8ac4b618b3..638882fb67 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -1,6 +1,7 @@ package htsjdk.samtools.cram.compression.rans.rans4x8; import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.CompressionUtils; import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSEncode; import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; @@ -15,7 +16,7 @@ public class RANS4x8Encode extends RANSEncode { // streams smaller than this value don't have sufficient symbol context for ORDER-1 encoding, // so always use ORDER-0 private static final int MINIMUM_ORDER_1_SIZE = 4; - private static final ByteBuffer EMPTY_BUFFER = Utils.allocateByteBuffer(0); + private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0); // This method assumes that inBuffer is already rewound. // It compresses the data in the inBuffer, leaving it consumed. @@ -44,7 +45,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { final int inputSize = inBuffer.remaining(); - final ByteBuffer outBuffer = Utils.allocateOutputBuffer(inputSize); + final ByteBuffer outBuffer = CompressionUtils.allocateOutputBuffer(inputSize); // move the output buffer ahead to the start of the frequency table (we'll come back and // write the output stream prefix at the end of this method) @@ -55,7 +56,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { // using the normalised frequencies, set the RANSEncodingSymbols buildSymsOrder0(normalizedFreq); - final ByteBuffer cp = Utils.slice(outBuffer); + final ByteBuffer cp = CompressionUtils.slice(outBuffer); // write Frequency table final int frequencyTableSize = writeFrequenciesOrder0(cp, normalizedFreq); @@ -65,7 +66,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; final int in_size = inBuffer.remaining(); long rans0, rans1, rans2, rans3; - final ByteBuffer ptr = Utils.slice(cp); + final ByteBuffer ptr = CompressionUtils.slice(cp); rans0 = Constants.RANS_4x8_LOWER_BOUND; rans1 = Constants.RANS_4x8_LOWER_BOUND; rans2 = Constants.RANS_4x8_LOWER_BOUND; @@ -112,7 +113,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { final int inSize = inBuffer.remaining(); - final ByteBuffer outBuffer = Utils.allocateOutputBuffer(inSize); + final ByteBuffer outBuffer = CompressionUtils.allocateOutputBuffer(inSize); // move to start of frequency outBuffer.position(Constants.RANS_4x8_PREFIX_BYTE_LENGTH); @@ -123,7 +124,7 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { // using the normalised frequencies, set the RANSEncodingSymbols buildSymsOrder1(normalizedFreq); - final ByteBuffer cp = Utils.slice(outBuffer); + final ByteBuffer cp = CompressionUtils.slice(outBuffer); final int frequencyTableSize = writeFrequenciesOrder1(cp, normalizedFreq); inBuffer.rewind(); final int in_size = inBuffer.remaining(); @@ -156,7 +157,7 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { byte l3 = inBuffer.get(in_size - 1); // Slicing is needed for buffer reversing later - final ByteBuffer ptr = Utils.slice(cp); + final ByteBuffer ptr = CompressionUtils.slice(cp); final RANSEncodingSymbol[][] syms = getEncodingSymbols(); for (i3 = in_size - 2; i3 > 4 * isz4 - 2 && i3 >= 0; i3--) { final byte c3 = inBuffer.get(i3); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index abd4ac85da..dcb81c8d5f 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -1,6 +1,7 @@ package htsjdk.samtools.cram.compression.rans.ransnx16; import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.CompressionUtils; import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSDecode; @@ -12,7 +13,7 @@ import java.util.Arrays; public class RANSNx16Decode extends RANSDecode { - private static final ByteBuffer EMPTY_BUFFER = Utils.allocateByteBuffer(0); + private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0); private static final int FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK = 0x01; private static final int RLE_META_OPTIONALLY_COMPRESSED_MASK = 0x01; @@ -36,7 +37,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { final RANSNx16Params ransNx16Params = new RANSNx16Params(formatFlags); // if nosz flag is set, then uncompressed size is not recorded. - int uncompressedSize = ransNx16Params.isNosz() ? outSize : Utils.readUint7(inBuffer); + int uncompressedSize = ransNx16Params.isNosz() ? outSize : CompressionUtils.readUint7(inBuffer); // if stripe, then decodeStripe if (ransNx16Params.isStripe()) { @@ -57,7 +58,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { for (int i = 0; i < numSymbols; i++) { packMappingTable[i] = inBuffer.get(); } - uncompressedSize = Utils.readUint7(inBuffer); + uncompressedSize = CompressionUtils.readUint7(inBuffer); } else { throw new CRAMException("Bit Packing is not permitted when number of distinct symbols is greater than 16 or equal to 0. " + "Number of distinct symbols: " + numSymbols); @@ -70,9 +71,9 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { ByteBuffer uncompressedRLEMetaData = null; if (ransNx16Params.isRLE()) { rleSymbols = new int[Constants.NUMBER_OF_SYMBOLS]; - final int uncompressedRLEMetaDataLength = Utils.readUint7(inBuffer); + final int uncompressedRLEMetaDataLength = CompressionUtils.readUint7(inBuffer); uncompressedRLEOutputLength = uncompressedSize; - uncompressedSize = Utils.readUint7(inBuffer); + uncompressedSize = CompressionUtils.readUint7(inBuffer); uncompressedRLEMetaData = decodeRLEMeta(inBuffer, uncompressedRLEMetaDataLength, rleSymbols, ransNx16Params); } @@ -80,14 +81,14 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { // If CAT is set then, the input is uncompressed if (ransNx16Params.isCAT()) { - outBuffer = Utils.slice(inBuffer); + outBuffer = CompressionUtils.slice(inBuffer); // While resetting the position to the end is not strictly necessary, // it is being done for the sake of completeness and // to meet the requirements of the tests that verify the boundary conditions. inBuffer.position(inBuffer.limit()); } else { - outBuffer = Utils.allocateByteBuffer(uncompressedSize); + outBuffer = CompressionUtils.allocateByteBuffer(uncompressedSize); // uncompressedSize is 0 in cases where Pack flag is used // and number of distinct symbols in the raw data is 1 @@ -112,7 +113,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { // if pack, then decodePack if (ransNx16Params.isPack()) { - outBuffer = decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); + outBuffer = CompressionUtils.decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); } return outBuffer; } @@ -186,16 +187,16 @@ private void uncompressOrder1WayN( // so is optionally compressed using the order-0 rANSNx16 codec with a fixed 4-way interleaving. // if optionalCompressFlag is true, the frequency table was compressed using RANS Nx16, N=4 Order 0 - final int uncompressedLength = Utils.readUint7(inBuffer); - final int compressedLength = Utils.readUint7(inBuffer); + final int uncompressedLength = CompressionUtils.readUint7(inBuffer); + final int compressedLength = CompressionUtils.readUint7(inBuffer); byte[] compressedFreqTable = new byte[compressedLength]; // read compressedLength bytes into compressedFreqTable byte array inBuffer.get(compressedFreqTable,0,compressedLength); // decode the compressedFreqTable to get the uncompressedFreqTable using RANS Nx16, N=4 Order 0 uncompress - freqTableSource = Utils.allocateByteBuffer(uncompressedLength); - final ByteBuffer compressedFrequencyTableBuffer = Utils.wrap(compressedFreqTable); + freqTableSource = CompressionUtils.allocateByteBuffer(uncompressedLength); + final ByteBuffer compressedFrequencyTableBuffer = CompressionUtils.wrap(compressedFreqTable); // uncompress using RANSNx16 Order 0, Nway = 4 // formatFlags = (~RANSNx16Params.ORDER_FLAG_MASK & ~RANSNx16Params.N32_FLAG_MASK) = ~(RANSNx16Params.ORDER_FLAG_MASK | RANSNx16Params.N32_FLAG_MASK) @@ -269,7 +270,7 @@ private void readFrequencyTableOrder0( // read frequencies, normalise frequencies for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if (alphabet[j] > 0) { - decoder.frequencies[j] = Utils.readUint7(cp); + decoder.frequencies[j] = CompressionUtils.readUint7(cp); } } Utils.normaliseFrequenciesOrder0Shift(decoder.frequencies, Constants.TOTAL_FREQ_SHIFT); @@ -303,7 +304,7 @@ private void readFrequencyTableOrder1( if (run > 0) { run--; } else { - D[i].frequencies[j] = Utils.readUint7(cp); + D[i].frequencies[j] = CompressionUtils.readUint7(cp); if (D[i].frequencies[j] == 0){ run = cp.get() & 0xFF; } @@ -363,13 +364,13 @@ private ByteBuffer decodeRLEMeta( if ((uncompressedRLEMetaDataLength & RLE_META_OPTIONALLY_COMPRESSED_MASK)!=0) { final byte[] uncompressedRLEMetaDataArray = new byte[(uncompressedRLEMetaDataLength-1)/2]; inBuffer.get(uncompressedRLEMetaDataArray, 0, (uncompressedRLEMetaDataLength-1)/2); - uncompressedRLEMetaData = Utils.wrap(uncompressedRLEMetaDataArray); + uncompressedRLEMetaData = CompressionUtils.wrap(uncompressedRLEMetaDataArray); } else { - final int compressedRLEMetaDataLength = Utils.readUint7(inBuffer); + final int compressedRLEMetaDataLength = CompressionUtils.readUint7(inBuffer); final byte[] compressedRLEMetaDataArray = new byte[compressedRLEMetaDataLength]; inBuffer.get(compressedRLEMetaDataArray,0,compressedRLEMetaDataLength); - final ByteBuffer compressedRLEMetaData = Utils.wrap(compressedRLEMetaDataArray); - uncompressedRLEMetaData = Utils.allocateByteBuffer(uncompressedRLEMetaDataLength / 2); + final ByteBuffer compressedRLEMetaData = CompressionUtils.wrap(compressedRLEMetaDataArray); + uncompressedRLEMetaData = CompressionUtils.allocateByteBuffer(uncompressedRLEMetaDataLength / 2); // uncompress using Order 0 and N = Nway uncompressOrder0WayN( compressedRLEMetaData, @@ -393,12 +394,12 @@ private ByteBuffer decodeRLE( final int[] rleSymbols, final ByteBuffer uncompressedRLEMetaData, final int uncompressedRLEOutputLength) { - final ByteBuffer rleOutBuffer = Utils.allocateByteBuffer(uncompressedRLEOutputLength); + final ByteBuffer rleOutBuffer = CompressionUtils.allocateByteBuffer(uncompressedRLEOutputLength); int j = 0; for(int i = 0; j< uncompressedRLEOutputLength; i++){ final byte sym = inBuffer.get(i); if (rleSymbols[sym & 0xFF]!=0){ - final int run = Utils.readUint7(uncompressedRLEMetaData); + final int run = CompressionUtils.readUint7(uncompressedRLEMetaData); for (int r=0; r<= run; r++){ rleOutBuffer.put(j++, sym); } @@ -409,63 +410,12 @@ private ByteBuffer decodeRLE( return rleOutBuffer; } - private ByteBuffer decodePack( - final ByteBuffer inBuffer, - final byte[] packMappingTable, - final int numSymbols, - final int uncompressedPackOutputLength) { - final ByteBuffer outBufferPack = Utils.allocateByteBuffer(uncompressedPackOutputLength); - int j = 0; - if (numSymbols <= 1) { - for (int i=0; i < uncompressedPackOutputLength; i++){ - outBufferPack.put(i, packMappingTable[0]); - } - } - - // 1 bit per value - else if (numSymbols <= 2) { - int v = 0; - for (int i=0; i < uncompressedPackOutputLength; i++){ - if (i % 8 == 0){ - v = inBuffer.get(j++); - } - outBufferPack.put(i, packMappingTable[v & 1]); - v >>=1; - } - } - - // 2 bits per value - else if (numSymbols <= 4){ - int v = 0; - for(int i=0; i < uncompressedPackOutputLength; i++){ - if (i % 4 == 0){ - v = inBuffer.get(j++); - } - outBufferPack.put(i, packMappingTable[v & 3]); - v >>=2; - } - } - - // 4 bits per value - else if (numSymbols <= 16){ - int v = 0; - for(int i=0; i < uncompressedPackOutputLength; i++){ - if (i % 2 == 0){ - v = inBuffer.get(j++); - } - outBufferPack.put(i, packMappingTable[v & 15]); - v >>=4; - } - } - return outBufferPack; - } - private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){ final int numInterleaveStreams = inBuffer.get() & 0xFF; // read lengths of compressed interleaved streams for ( int j=0; j { // Stripe flag is not implemented in the write implementation ///////////////////////////////////////////////////////////////////////////////////////////////// - private static final ByteBuffer EMPTY_BUFFER = Utils.allocateByteBuffer(0); + private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0); // This method assumes that inBuffer is already rewound. // It compresses the data in the inBuffer, leaving it consumed. @@ -24,14 +25,14 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } - final ByteBuffer outBuffer = Utils.allocateOutputBuffer(inBuffer.remaining()); + final ByteBuffer outBuffer = CompressionUtils.allocateOutputBuffer(inBuffer.remaining()); final int formatFlags = ransNx16Params.getFormatFlags(); outBuffer.put((byte) (formatFlags)); // one byte for formatFlags // NoSize if (!ransNx16Params.isNosz()) { // original size is not recorded - Utils.writeUint7(inBuffer.remaining(),outBuffer); + CompressionUtils.writeUint7(inBuffer.remaining(),outBuffer); } ByteBuffer inputBuffer = inBuffer; @@ -59,7 +60,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN // skip Packing if numSymbols = 0 or numSymbols > 16 if (numSymbols !=0 && numSymbols <= 16) { - inputBuffer = encodePack(inputBuffer, outBuffer, frequencyTable, packMappingTable, numSymbols); + inputBuffer = CompressionUtils.encodePack(inputBuffer, outBuffer, frequencyTable, packMappingTable, numSymbols); } else { // unset pack flag in the first byte of the outBuffer outBuffer.put(0,(byte)(outBuffer.get(0) & ~RANSNx16Params.PACK_FLAG_MASK)); @@ -117,7 +118,7 @@ private void compressOrder0WayN ( } final int prefix_size = outBuffer.position(); final int[] F = buildFrequenciesOrder0(inBuffer); - final ByteBuffer cp = Utils.slice(outBuffer); + final ByteBuffer cp = CompressionUtils.slice(outBuffer); // Normalize Frequencies such that sum of Frequencies = 1 << bitsize Utils.normaliseFrequenciesOrder0(F, bitSize); @@ -150,7 +151,7 @@ private void compressOrder0WayN ( for (int r=0; r0){ @@ -198,8 +199,8 @@ private void compressOrder1WayN ( Utils.normaliseFrequenciesOrder1(frequencies, Constants.TOTAL_FREQ_SHIFT); final int prefix_size = outBuffer.position(); - ByteBuffer frequencyTable = Utils.allocateOutputBuffer(1); - final ByteBuffer compressedFrequencyTable = Utils.allocateOutputBuffer(1); + ByteBuffer frequencyTable = CompressionUtils.allocateOutputBuffer(1); + final ByteBuffer compressedFrequencyTable = CompressionUtils.allocateOutputBuffer(1); // uncompressed frequency table final int uncompressedFrequencyTableSize = writeFrequenciesOrder1(frequencyTable,frequencies); @@ -216,7 +217,7 @@ private void compressOrder1WayN ( // TODO: we should work on a more permanent solution for this issue! initializeRANSEncoder(); final int compressedFrequencyTableSize = compressedFrequencyTable.limit(); - final ByteBuffer cp = Utils.slice(outBuffer); + final ByteBuffer cp = CompressionUtils.slice(outBuffer); // spec: The order-1 frequency table itself may still be quite large, // so is optionally compressed using the order-0 rANSNx16 codec with a fixed 4-way interleaving. @@ -224,8 +225,8 @@ private void compressOrder1WayN ( // first byte cp.put((byte) (1 | Constants.TOTAL_FREQ_SHIFT << 4 )); - Utils.writeUint7(uncompressedFrequencyTableSize,cp); - Utils.writeUint7(compressedFrequencyTableSize,cp); + CompressionUtils.writeUint7(uncompressedFrequencyTableSize,cp); + CompressionUtils.writeUint7(compressedFrequencyTableSize,cp); // write bytes from compressedFrequencyTable to cp int i=0; @@ -283,7 +284,7 @@ private void compressOrder1WayN ( } // Slicing is needed for buffer reversing later. - final ByteBuffer ptr = Utils.slice(cp); + final ByteBuffer ptr = CompressionUtils.slice(cp); final RANSEncodingSymbol[][] ransEncodingSymbols = getEncodingSymbols(); final byte[] context = new byte[Nway]; @@ -419,7 +420,7 @@ private static int writeFrequenciesOrder1(final ByteBuffer cp, final int[][] F) if (run > 0) { run--; } else { - Utils.writeUint7(F[i][j],cp); + CompressionUtils.writeUint7(F[i][j],cp); if (F[i][j] == 0) { // Count how many more zero-freqs we have for (int k = j+1; k < Constants.NUMBER_OF_SYMBOLS; k++) { @@ -502,7 +503,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff // create rleMetaData buffer to store rle metadata. // This buffer will be compressed using compressOrder0WayN towards the end of this method // TODO: How did we come up with this calculation for Buffer size? numRLESymbols+1+inputSize - final ByteBuffer rleMetaData = Utils.allocateByteBuffer(numRLESymbols+1+inputSize); // rleMetaData + final ByteBuffer rleMetaData = CompressionUtils.allocateByteBuffer(numRLESymbols+1+inputSize); // rleMetaData // write number of symbols that are run length encoded rleMetaData.put((byte) numRLESymbols); @@ -517,7 +518,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff // Apply RLE // encodedBuffer -> input src data without repetition - final ByteBuffer encodedBuffer = Utils.allocateByteBuffer(inputSize); // rleInBuffer + final ByteBuffer encodedBuffer = CompressionUtils.allocateByteBuffer(inputSize); // rleInBuffer int encodedBufferIdx = 0; // rleInBufferIndex for (int i = 0; i < inputSize; i++) { @@ -532,7 +533,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff } // write the run value to metadata - Utils.writeUint7(run, rleMetaData); + CompressionUtils.writeUint7(run, rleMetaData); // go to the next element that is not equal to its previous element i += run; @@ -545,15 +546,15 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff rleMetaData.rewind(); // compress the rleMetaData Buffer - final ByteBuffer compressedRleMetaData = Utils.allocateOutputBuffer(rleMetaData.remaining()); + final ByteBuffer compressedRleMetaData = CompressionUtils.allocateOutputBuffer(rleMetaData.remaining()); // compress using Order 0 and N = Nway compressOrder0WayN(rleMetaData, new RANSNx16Params(0x00 | ransNx16Params.getFormatFlags() & RANSNx16Params.N32_FLAG_MASK),compressedRleMetaData); // write to compressedRleMetaData to outBuffer - Utils.writeUint7(rleMetaData.limit()*2, outBuffer); - Utils.writeUint7(encodedBufferIdx, outBuffer); - Utils.writeUint7(compressedRleMetaData.limit(),outBuffer); + CompressionUtils.writeUint7(rleMetaData.limit()*2, outBuffer); + CompressionUtils.writeUint7(encodedBufferIdx, outBuffer); + CompressionUtils.writeUint7(compressedRleMetaData.limit(),outBuffer); outBuffer.put(compressedRleMetaData); @@ -565,67 +566,4 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff return encodedBuffer; } - private ByteBuffer encodePack( - final ByteBuffer inBuffer , - final ByteBuffer outBuffer, - final int[] frequencyTable, - final int[] packMappingTable, - final int numSymbols){ - final int inSize = inBuffer.remaining(); - final ByteBuffer encodedBuffer; - if (numSymbols <= 1) { - encodedBuffer = Utils.allocateByteBuffer(0); - } else if (numSymbols <= 2) { - - // 1 bit per value - final int encodedBufferSize = (int) Math.ceil((double) inSize/8); - encodedBuffer = Utils.allocateByteBuffer(encodedBufferSize); - int j = -1; - for (int i = 0; i < inSize; i ++) { - if (i % 8 == 0) { - encodedBuffer.put(++j, (byte) 0); - } - encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << (i % 8)))); - } - } else if (numSymbols <= 4) { - - // 2 bits per value - final int encodedBufferSize = (int) Math.ceil((double) inSize/4); - encodedBuffer = Utils.allocateByteBuffer(encodedBufferSize); - int j = -1; - for (int i = 0; i < inSize; i ++) { - if (i % 4 == 0) { - encodedBuffer.put(++j, (byte) 0); - } - encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 4) * 2)))); - } - } else { - - // 4 bits per value - final int encodedBufferSize = (int) Math.ceil((double)inSize/2); - encodedBuffer = Utils.allocateByteBuffer(encodedBufferSize); - int j = -1; - for (int i = 0; i < inSize; i ++) { - if (i % 2 == 0) { - encodedBuffer.put(++j, (byte) 0); - } - encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 2) * 4)))); - } - } - - // write numSymbols - outBuffer.put((byte) numSymbols); - - // write mapping table "packMappingTable" that converts mapped value to original symbol - for(int i = 0 ; i < Constants.NUMBER_OF_SYMBOLS; i ++) { - if (frequencyTable[i] > 0) { - outBuffer.put((byte) i); - } - } - - // write the length of data - Utils.writeUint7(encodedBuffer.limit(), outBuffer); - return encodedBuffer; // Here position = 0 since we have always accessed the data buffer using index - } - } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index c8379e8f63..9c1abafc29 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -1,10 +1,10 @@ package htsjdk.samtools.cram; import htsjdk.HtsjdkTest; +import htsjdk.samtools.cram.compression.CompressionUtils; import htsjdk.samtools.cram.compression.rans.RANSDecode; import htsjdk.samtools.cram.compression.rans.RANSEncode; import htsjdk.samtools.cram.compression.rans.RANSParams; -import htsjdk.samtools.cram.compression.rans.Utils; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; @@ -177,7 +177,7 @@ public void testRANSRoundTrip( // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results - final ByteBuffer uncompressedInteropBytes = Utils.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer uncompressedInteropBytes = CompressionUtils.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); // Stripe Flag is not implemented in RANSNx16 Encoder. // The encoder throws CRAMException if Stripe Flag is used. @@ -208,8 +208,8 @@ public void testDecodeOnly( // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results - final ByteBuffer uncompressedInteropBytes = Utils.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); - final ByteBuffer preCompressedInteropBytes = Utils.wrap(IOUtils.toByteArray(preCompressedInteropStream)); + final ByteBuffer uncompressedInteropBytes = CompressionUtils.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer preCompressedInteropBytes = CompressionUtils.wrap(IOUtils.toByteArray(preCompressedInteropStream)); // Use htsjdk to uncompress the precompressed file from htscodecs repo final ByteBuffer uncompressedHtsjdkBytes = ransDecode.uncompress(preCompressedInteropBytes); diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 4e8a1e6bd2..9495d826ef 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -2,6 +2,7 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.CompressionUtils; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; @@ -174,7 +175,7 @@ public void testRoundTripTinySmallLarge( final TestDataEnvelope td, final Integer lowerLimit, final Integer upperLimit){ - final ByteBuffer in = Utils.wrap(td.testArray); + final ByteBuffer in = CompressionUtils.wrap(td.testArray); for (int rawSize = lowerLimit; rawSize < upperLimit; rawSize++) { in.position(0); in.limit(rawSize); @@ -188,7 +189,7 @@ public void testRans4x8BuffersMeetBoundaryExpectations( final RANS4x8Decode ransDecode, final RANS4x8Params params) { final int rawSize = 1001; - final ByteBuffer rawData = Utils.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); + final ByteBuffer rawData = CompressionUtils.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(rawSize,rawData,ransEncode, ransDecode,params); Assert.assertTrue(compressed.limit() > Constants.RANS_4x8_PREFIX_BYTE_LENGTH); // minimum prefix len when input is not Empty Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); @@ -202,7 +203,7 @@ public void testRansNx16BuffersMeetBoundaryExpectations( final RANSNx16Decode ransDecode, final RANSNx16Params params) { final int rawSize = 1001; - final ByteBuffer rawData = Utils.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); + final ByteBuffer rawData = CompressionUtils.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(rawSize,rawData,ransEncode,ransDecode,params); rawData.rewind(); Assert.assertTrue(compressed.limit() > 1); // minimum prefix len when input is not Empty @@ -227,7 +228,7 @@ public void testRansNx16BuffersMeetBoundaryExpectations( } // if nosz flag is not set, then the uncompressed size is recorded if (!params.isNosz()){ - Assert.assertEquals(Utils.readUint7(compressed), rawSize); + Assert.assertEquals(CompressionUtils.readUint7(compressed), rawSize); } } @@ -237,7 +238,7 @@ public void testRoundTrip( final RANSDecode ransDecode, final RANSParams params, final TestDataEnvelope td) { - ransRoundTrip(ransEncode, ransDecode, params, Utils.wrap(td.testArray)); + ransRoundTrip(ransEncode, ransDecode, params, CompressionUtils.wrap(td.testArray)); } @Test( @@ -251,7 +252,7 @@ public void testRansNx16RejectEncodeStripe( // When td is not Empty, Encoding with Stripe Flag should throw an Exception // as Encode Stripe is not implemented - ransEncode.compress(Utils.wrap(td.testArray), params); + ransEncode.compress(CompressionUtils.wrap(td.testArray), params); } @Test( @@ -261,7 +262,7 @@ public void testRansNx16RejectEncodeStripe( expectedExceptionsMessageRegExp = "Bit Packing is not permitted when number " + "of distinct symbols is greater than 16 or equal to 0. Number of distinct symbols: 0") public void testRANSNx16RejectDecodePack(){ - final ByteBuffer compressedData = Utils.wrap(new byte[]{(byte) RANSNx16Params.PACK_FLAG_MASK, (byte) 0x00, (byte) 0x00}); + final ByteBuffer compressedData = CompressionUtils.wrap(new byte[]{(byte) RANSNx16Params.PACK_FLAG_MASK, (byte) 0x00, (byte) 0x00}); final RANSNx16Decode ransDecode = new RANSNx16Decode(); ransDecode.uncompress(compressedData); } From 2db77e989b772418e2775661ee1e0ae43004467b Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 17 Oct 2022 16:03:06 -0400 Subject: [PATCH 64/76] add Range Encode --- .../cram/compression/range/ByteModel.java | 67 +++++ .../cram/compression/range/Constants.java | 7 + .../cram/compression/range/RangeCoder.java | 88 +++++++ .../cram/compression/range/RangeDecode.java | 5 + .../cram/compression/range/RangeEncode.java | 230 ++++++++++++++++++ .../cram/compression/range/RangeParams.java | 81 ++++++ .../cram/compression/range/RangeTest.java | 14 ++ 7 files changed, 492 insertions(+) create mode 100644 src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/range/Constants.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java create mode 100644 src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java create mode 100644 src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java b/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java new file mode 100644 index 0000000000..865f7f0793 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java @@ -0,0 +1,67 @@ +package htsjdk.samtools.cram.compression.range; + +import com.sun.org.apache.xalan.internal.xsltc.compiler.*; + +import java.nio.*; + +public class ByteModel { + // Is this analogous to Arithmetic Decoder in rans + + public int totalFrequency; + public int maxSymbol; + public final int[] symbols =new int[Constants.NUMBER_OF_SYMBOLS]; + public final int[] frequencies = new int[Constants.NUMBER_OF_SYMBOLS]; + + public ByteModel(final int numSymbols) { + // Spec: ModelCreate method + this.totalFrequency = numSymbols; + this.maxSymbol = numSymbols - 1; + for (int i = 0; i < maxSymbol; i++) { + this.symbols[i] = 0; + this.frequencies[i] = 0; + } + } + + public void modelRenormalize(){ + totalFrequency = 0; + for (int i=0; i < maxSymbol; i++){ + frequencies[i] -= Math.floorDiv(frequencies[i],2); + totalFrequency += frequencies[i]; + } + } + + public void modelEncode(final ByteBuffer outBuffer, RangeCoder rangeCoder, int symbol){ + + // find cumulative frequency + int acc = 0; + int i; + for( i = 0; symbols[i] != symbol; i++){ + acc += frequencies[i]; + } + + // Encode + rangeCoder.rangeEncode(outBuffer, acc, frequencies[i],totalFrequency); + + // Update Model + frequencies[i] += Constants.STEP; + totalFrequency += Constants.STEP; + if (totalFrequency > Constants.MAX_FREQ){ + modelRenormalize(); // How are we ensuring freq of symbol is never 0 + } + + // Keep symbols approximately frequency sorted (ascending order) + symbol = symbols[i]; + if (i > 0 && frequencies[i] > frequencies[i-1]){ + // swap frequencies + int tmp = frequencies[i]; + frequencies[i] = frequencies[i-1]; + frequencies[i-1]=tmp; + + // swap symbols + tmp = symbols[i]; + symbols[i] = symbols[i-1]; + symbols[i-1] = tmp; + } + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/Constants.java b/src/main/java/htsjdk/samtools/cram/compression/range/Constants.java new file mode 100644 index 0000000000..25066b1d2e --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/range/Constants.java @@ -0,0 +1,7 @@ +package htsjdk.samtools.cram.compression.range; + +final public class Constants { + public static final int NUMBER_OF_SYMBOLS = 256; + public static final int MAX_FREQ = ((1<<16)-17); + public static final int STEP = 16; +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java new file mode 100644 index 0000000000..e147b48a22 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java @@ -0,0 +1,88 @@ +package htsjdk.samtools.cram.compression.range; + +import java.nio.ByteBuffer; + +public class RangeCoder { + + private int low; + private long range; + private int code; + private int FFnum; + private boolean carry; + private int cache; + + public RangeCoder() { + // Spec: RangeEncodeStart + low = 0; + range = 0xffffffff; // 4 bytes of all 1's (2**32 - 1) + code = 0; + FFnum = 0; + carry = false; + cache = 0; + } + + public int rangeGetFrequency(final int tot_freq){ + range = (long) Math.floor(range / tot_freq); + return (int) Math.floor(code / range); + } + + public void rangeShiftLow(ByteBuffer outBuffer) { + // rangeShiftLow tracks the total number of extra bytes to emit and + // carry indicates whether they are a string of 0xFF or 0x00 values + + // range must be less than (2^24) or (1<<24) or (0x1000000) + // "cache" holds the top byte that will be flushed to the output + + + if ((low < 0xff000000) || carry) { + if (carry == false) { + outBuffer.put((byte) cache); + while (FFnum > 0) { + outBuffer.put((byte) 0xFF); + FFnum--; + } + } else { + outBuffer.put((byte) (cache + 1)); + while (FFnum > 0) { + outBuffer.put((byte) 0x00); + FFnum--; + } + + } + cache = low >> 24; // Copy of top byte ready for next flush + carry = false; + } else { + FFnum++; + } + low = low << 8; + // TODO: is it necessary to do -> low = low >>> 0 // keep "low" positive + // i.e, arithmetic right shift by 0 bits? + } + + public void rangeEncode(final ByteBuffer outBuffer, final int sym_low, final int sym_freq, final int tot_freq){ + int old_low = low; + range = (long) Math.floor(range/tot_freq); + low += sym_low * range; + low >>>=0; // TODO: Inspect this!! Truncate to +ve int so we can spot overflow + range *= sym_freq; + + if (low < old_low) { + carry = true; + } + + // Renormalise if range gets too small + while (range < (1<<24)) { + range <<= 8; // range *= 256 + rangeShiftLow(outBuffer); + } + + } + + public void rangeEncodeEnd(final ByteBuffer outBuffer){ + //TODO: Where is the magic number 5 coming from? + for(int i=0; i<5;i++){ + rangeShiftLow(outBuffer); + } + } + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java new file mode 100644 index 0000000000..add18ce357 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java @@ -0,0 +1,5 @@ +package htsjdk.samtools.cram.compression.range; + +public class RangeDecode { + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java new file mode 100644 index 0000000000..a7f99a707e --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java @@ -0,0 +1,230 @@ +package htsjdk.samtools.cram.compression.range; + + +import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.rans.Utils; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public class RangeEncode { + + private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); +// private static final int MINIMUM__ORDER_1_SIZE = 4; + + public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangeParams) { + if (inBuffer.remaining() == 0) { + return EMPTY_BUFFER; + } + + final ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining()); + final int formatFlags = rangeParams.getFormatFlags(); + outBuffer.put((byte) (formatFlags)); // one byte for formatFlags + + // NoSize + if (!rangeParams.isNosz()) { + // original size is not recorded + int insize = inBuffer.remaining(); + Utils.writeUint7(insize,outBuffer); + } + + ByteBuffer inputBuffer = inBuffer; + + // Stripe flag is not implemented in the write implementation + if (rangeParams.isStripe()) { + throw new CRAMException("Range Encoding with Stripe Flag is not implemented."); + } + + final RangeParams.ORDER order = rangeParams.getOrder(); + final int e_len = inputBuffer.remaining(); // e_len -> inSize + + // Pack + if (rangeParams.isPack()) { + final int[] frequencyTable = new int[Constants.NUMBER_OF_SYMBOLS]; + final int inSize = inputBuffer.remaining(); + for (int i = 0; i < inSize; i ++) { + frequencyTable[inputBuffer.get(i) & 0xFF]++; + } + int numSymbols = 0; + final int[] packMappingTable = new int[Constants.NUMBER_OF_SYMBOLS]; + for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { + if (frequencyTable[i]>0) { + packMappingTable[i] = numSymbols++; + } + } + + // skip Packing if numSymbols = 0 or numSymbols > 16 + if (numSymbols !=0 && numSymbols <= 16) { + inputBuffer = encodePack(inputBuffer, outBuffer, frequencyTable, packMappingTable, numSymbols); + } else { + // unset pack flag in the first byte of the outBuffer + outBuffer.put(0,(byte)(outBuffer.get(0) & ~RangeParams.PACK_FLAG_MASK)); + } + } + + if (rangeParams.isCAT()){ + + } else if (rangeParams.isExternalCompression()){ + + + } else if (rangeParams.isRLE()){ + switch (rangeParams.getOrder()) { + case ZERO: +// return compressRLEOrder0(inputBuffer, rangeParams, outBuffer); //src, e_len, this.stream + case ONE: +// return compressRLEOrder1(inputBuffer, rangeParams, outBuffer); //src, e_len, this.stream + } + } else { + switch (rangeParams.getOrder()) { + case ZERO: + return compressOrder0(inputBuffer, rangeParams, outBuffer); //src, e_len, this.stream + case ONE: +// return compressOrder1(inputBuffer, rangeParams, outBuffer); //src, e_len, this.stream + } + + } + + + +// if flagsAND Cat then +// data ReadData(len) +// else if flagsAND Ext then +// data DecodeEXT(len) +// else if flagsAND RLE then +// ... +// else .. + + +// // step 1: Encode meta-data +// var pack_meta +// if (flags & ARITH_PACK) +// [pack_meta, src, e_len] = this.encodePack(src) +// +// // step 2: Write any meta data +// if (flags & ARITH_PACK) +// this.stream.WriteStream(pack_meta) + + + + + + + + // temp + return inBuffer; + } + + private ByteBuffer compressOrder0 ( + final ByteBuffer inBuffer, + final RangeParams rangeParams, + final ByteBuffer outBuffer) { + + int maxSymbol = 0; + final int inSize = inBuffer.remaining(); + for (int i = 0; i < inSize; i++){ + if(maxSymbol < inBuffer.get(i)){ + maxSymbol = inBuffer.get(i); + } + } + maxSymbol++; // TODO: Is this correct? Not what spec states!! + + ByteModel byteModel = new ByteModel(maxSymbol); + outBuffer.put((byte) maxSymbol); + + // TODO: should we pass outBuffer to rangecoder? + RangeCoder rangeCoder = new RangeCoder(); + + for (int i=0; i 0) { + outBuffer.put((byte) i); + } + } + + // write the length of data + Utils.writeUint7(data.limit(), outBuffer); + return data; // Here position = 0 since we have always accessed the data buffer using index + } +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java new file mode 100644 index 0000000000..0c0cb193a8 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java @@ -0,0 +1,81 @@ +package htsjdk.samtools.cram.compression.range; + +public class RangeParams { + public static final int ORDER_FLAG_MASK = 0x01; + public static final int EXT_FLAG_MASK = 0x04; + public static final int STRIPE_FLAG_MASK = 0x08; + public static final int NOSZ_FLAG_MASK = 0x10; + public static final int CAT_FLAG_MASK = 0x20; + public static final int RLE_FLAG_MASK = 0x40; + public static final int PACK_FLAG_MASK = 0x80; + + + // format is the first byte of the compressed data stream, + // which consists of all the bit-flags detailing the type of transformations + // and entropy encoders to be combined + private int formatFlags; + + private static final int FORMAT_FLAG_MASK = 0xFF; + + enum ORDER { + ZERO, ONE; + + public static RangeParams.ORDER fromInt(final int orderValue) { + try { + return RangeParams.ORDER.values()[orderValue]; + } catch (final ArrayIndexOutOfBoundsException e) { + throw new IllegalArgumentException("Unknown Range order: " + orderValue, e); + } + } + } + + public RangeParams(int formatFlags) { + this.formatFlags = formatFlags; + } + + @Override + public String toString() { + return "RangeParams{" + "formatFlags=" + formatFlags + "}"; + } + + public int getFormatFlags(){ + // first byte of the encoded stream + return formatFlags & FORMAT_FLAG_MASK; + } + + public ORDER getOrder() { + // Range Order ZERO or ONE encoding + return ORDER.fromInt(formatFlags & ORDER_FLAG_MASK); //convert into order type + } + + public boolean isExternalCompression(){ + // “External” compression via bzip2 + return ((formatFlags & EXT_FLAG_MASK)!=0); + } + + public boolean isStripe(){ + // multiway interleaving of byte streams + return ((formatFlags & STRIPE_FLAG_MASK)!=0); + } + + public boolean isNosz(){ + // original size is not recorded (for use by Stripe) + return ((formatFlags & NOSZ_FLAG_MASK)!=0); + } + + public boolean isCAT(){ + // Data is uncompressed + return ((formatFlags & CAT_FLAG_MASK)!=0); + } + + public boolean isRLE(){ + // Run length encoding, with runs and literals encoded separately + return ((formatFlags & RLE_FLAG_MASK)!=0); + } + + public boolean isPack(){ + // Pack 2, 4, 8 or infinite symbols per byte + return ((formatFlags & PACK_FLAG_MASK)!=0); + } + +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java new file mode 100644 index 0000000000..fab0096806 --- /dev/null +++ b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java @@ -0,0 +1,14 @@ +package htsjdk.samtools.cram.compression.range; + +import htsjdk.HtsjdkTest; +import org.testng.annotations.Test; + +public class RangeTest extends HtsjdkTest { + + @Test + public void testRoundTrip(){ + + + } + +} \ No newline at end of file From 43a68c90dabad5ff4252809b087ec519eeac823e Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Fri, 28 Oct 2022 15:26:48 -0400 Subject: [PATCH 65/76] Fix RangeEncode for order 0 and formatflags=0x00 --- .../cram/compression/range/ByteModel.java | 22 +++++++------ .../cram/compression/range/RangeCoder.java | 27 ++++++++-------- .../cram/compression/range/RangeEncode.java | 12 +++---- .../cram/compression/range/Utils.java | 31 +++++++++++++++++++ .../cram/compression/range/RangeTest.java | 18 +++++++++++ 5 files changed, 79 insertions(+), 31 deletions(-) create mode 100644 src/main/java/htsjdk/samtools/cram/compression/range/Utils.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java b/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java index 865f7f0793..31a8820454 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java @@ -1,24 +1,26 @@ package htsjdk.samtools.cram.compression.range; -import com.sun.org.apache.xalan.internal.xsltc.compiler.*; - -import java.nio.*; +import java.nio.ByteBuffer; public class ByteModel { - // Is this analogous to Arithmetic Decoder in rans - + // spec: To encode any symbol the entropy encoder needs to know + // the frequency of the symbol to encode, + // the cumulative frequencies of all symbols prior to this symbol, + // and the total of all frequencies. public int totalFrequency; public int maxSymbol; - public final int[] symbols =new int[Constants.NUMBER_OF_SYMBOLS]; - public final int[] frequencies = new int[Constants.NUMBER_OF_SYMBOLS]; + public final int[] symbols; + public final int[] frequencies; public ByteModel(final int numSymbols) { // Spec: ModelCreate method this.totalFrequency = numSymbols; this.maxSymbol = numSymbols - 1; - for (int i = 0; i < maxSymbol; i++) { - this.symbols[i] = 0; - this.frequencies[i] = 0; + frequencies = new int[maxSymbol+1]; + symbols = new int[maxSymbol+1]; + for (int i = 0; i <= maxSymbol; i++) { + this.symbols[i] = i; + this.frequencies[i] = 1; } } diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java index e147b48a22..463b0196f1 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java @@ -4,7 +4,7 @@ public class RangeCoder { - private int low; + private long low; private long range; private int code; private int FFnum; @@ -13,12 +13,12 @@ public class RangeCoder { public RangeCoder() { // Spec: RangeEncodeStart - low = 0; - range = 0xffffffff; // 4 bytes of all 1's (2**32 - 1) - code = 0; - FFnum = 0; - carry = false; - cache = 0; + this.low = 0; + this.range = 0xFFFFFFFFL; // 4 bytes of all 1's + this.code = 0; + this.FFnum = 0; + this.carry = false; + this.cache = 0; } public int rangeGetFrequency(final int tot_freq){ @@ -34,7 +34,7 @@ public void rangeShiftLow(ByteBuffer outBuffer) { // "cache" holds the top byte that will be flushed to the output - if ((low < 0xff000000) || carry) { + if ((low < 0xff000000L) || carry) { //TODO: 0xff000000L make this magic number a constant if (carry == false) { outBuffer.put((byte) cache); while (FFnum > 0) { @@ -49,21 +49,20 @@ public void rangeShiftLow(ByteBuffer outBuffer) { } } - cache = low >> 24; // Copy of top byte ready for next flush + cache = (int) (low >> 24); // Copy of top byte ready for next flush carry = false; } else { FFnum++; } - low = low << 8; - // TODO: is it necessary to do -> low = low >>> 0 // keep "low" positive - // i.e, arithmetic right shift by 0 bits? + + low = (low<<8) & (0xFFFFFFFFL); // truncate top byte or keep bottom 4 bytes } public void rangeEncode(final ByteBuffer outBuffer, final int sym_low, final int sym_freq, final int tot_freq){ - int old_low = low; + long old_low = low; range = (long) Math.floor(range/tot_freq); low += sym_low * range; - low >>>=0; // TODO: Inspect this!! Truncate to +ve int so we can spot overflow + low = low & (0xFFFFFFFFL); // keep bottom 4 bytes range *= sym_freq; if (low < old_low) { diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java index a7f99a707e..e45abf3b45 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java @@ -1,16 +1,12 @@ package htsjdk.samtools.cram.compression.range; - import htsjdk.samtools.cram.CRAMException; -import htsjdk.samtools.cram.compression.rans.Utils; - import java.nio.ByteBuffer; import java.nio.ByteOrder; -public class RangeEncode { +public class RangeEncode { private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); -// private static final int MINIMUM__ORDER_1_SIZE = 4; public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangeParams) { if (inBuffer.remaining() == 0) { @@ -128,6 +124,7 @@ private ByteBuffer compressOrder0 ( } maxSymbol++; // TODO: Is this correct? Not what spec states!! + // TODO: initialize byteModel -> set and reset symbols? ByteModel byteModel = new ByteModel(maxSymbol); outBuffer.put((byte) maxSymbol); @@ -135,13 +132,14 @@ private ByteBuffer compressOrder0 ( RangeCoder rangeCoder = new RangeCoder(); for (int i=0; i >= 7; + } while (X > 0); + do { + s -= 7; + //writeByte + int s_ = (s > 0) ? 1 : 0; + cp.put((byte) (((i >> s) & 0x7f) + (s_ << 7))); + } while (s > 0); + } + + public static int readUint7(ByteBuffer cp) { + int i = 0; + int c; + do { + //read byte + c = cp.get(); + i = (i << 7) | (c & 0x7f); + } while ((c & 0x80) != 0); + return i; + } +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java index fab0096806..33f456bd8a 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java @@ -3,11 +3,29 @@ import htsjdk.HtsjdkTest; import org.testng.annotations.Test; +import java.nio.ByteBuffer; + public class RangeTest extends HtsjdkTest { + private static class TestDataEnvelope { + public final byte[] testArray; + public TestDataEnvelope(final byte[] testdata) { + this.testArray = testdata; + } + public String toString() { + return String.format("Array of size %d", testArray.length); + } + } + + @Test public void testRoundTrip(){ + final RangeEncode rangeEncode = new RangeEncode(); + final RangeParams rangeParams = new RangeParams(0); + TestDataEnvelope td = new TestDataEnvelope(new byte[]{0, 1, 2, 3}); + ByteBuffer inputData = ByteBuffer.wrap(td.testArray); + final ByteBuffer outBuffer = rangeEncode.compress(inputData,rangeParams); } From 49eac0ea1d1f193191a13b7d153adccb37a1771d Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 12 Dec 2022 14:44:10 -0500 Subject: [PATCH 66/76] rebase - Add Range Codec, RangeTest, RangeInteropTest for order, rle, stripe, nosize, cat flags --- .../cram/compression/range/ByteModel.java | 61 ++++- .../cram/compression/range/RangeCoder.java | 30 ++- .../cram/compression/range/RangeDecode.java | 247 ++++++++++++++++++ .../cram/compression/range/RangeEncode.java | 181 ++++++++++--- .../cram/compression/range/RangeParams.java | 5 +- .../rans/rans4x8/RANS4x8Decode.java | 2 + .../samtools/cram/CRAMInteropTestUtils.java | 11 +- .../samtools/cram/RangeInteropTest.java | 153 +++++++++++ .../cram/compression/range/RangeTest.java | 195 +++++++++++++- 9 files changed, 831 insertions(+), 54 deletions(-) create mode 100644 src/test/java/htsjdk/samtools/cram/RangeInteropTest.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java b/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java index 31a8820454..be32d27613 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java @@ -24,9 +24,58 @@ public ByteModel(final int numSymbols) { } } + // TODO: use this method to reset + public void reset() { + totalFrequency = 0; + for (int i = 0; i <= maxSymbol; i++) { + symbols[i] = 0; + frequencies[i] = 0; + } + // maxSymbol = 0; // TODO: ??? + } + + public int modelDecode(ByteBuffer inBuffer, RangeCoder rangeCoder){ + + // decodes one symbol + final int freq = rangeCoder.rangeGetFrequency(totalFrequency); + int cumulativeFrequency = 0; + int x = 0; + while (cumulativeFrequency + frequencies[x] <= freq){ + cumulativeFrequency += frequencies[x++]; + } + + // update rangecoder + rangeCoder.rangeDecode(inBuffer,cumulativeFrequency,frequencies[x],totalFrequency); + + // update model frequencies + frequencies[x] += Constants.STEP; + totalFrequency += Constants.STEP; + if (totalFrequency > Constants.MAX_FREQ){ + // if totalFrequency is too high, the frequencies are halved, making + // sure to avoid any zero frequencies being created. + modelRenormalize(); + } + + // keep symbols approximately frequency sorted + int symbol = symbols[x]; + if (x > 0 && frequencies[x] > frequencies[x-1]){ + // Swap frequencies[x], frequencies[x-1] + int tmp = frequencies[x]; + frequencies[x] = frequencies[x-1]; + frequencies[x-1] = tmp; + + // Swap symbols[x], symbols[x-1] + tmp = symbols[x]; + symbols[x] = symbols[x-1]; + symbols[x-1] = tmp; + } + return symbol; + } + public void modelRenormalize(){ + // frequencies are halved totalFrequency = 0; - for (int i=0; i < maxSymbol; i++){ + for (int i=0; i <= maxSymbol; i++){ frequencies[i] -= Math.floorDiv(frequencies[i],2); totalFrequency += frequencies[i]; } @@ -34,21 +83,21 @@ public void modelRenormalize(){ public void modelEncode(final ByteBuffer outBuffer, RangeCoder rangeCoder, int symbol){ - // find cumulative frequency - int acc = 0; + // encodes one input symbol + int cumulativeFrequency = 0; int i; for( i = 0; symbols[i] != symbol; i++){ - acc += frequencies[i]; + cumulativeFrequency += frequencies[i]; } // Encode - rangeCoder.rangeEncode(outBuffer, acc, frequencies[i],totalFrequency); + rangeCoder.rangeEncode(outBuffer, cumulativeFrequency, frequencies[i],totalFrequency); // Update Model frequencies[i] += Constants.STEP; totalFrequency += Constants.STEP; if (totalFrequency > Constants.MAX_FREQ){ - modelRenormalize(); // How are we ensuring freq of symbol is never 0 + modelRenormalize(); } // Keep symbols approximately frequency sorted (ascending order) diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java index 463b0196f1..98653914bb 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java @@ -6,7 +6,7 @@ public class RangeCoder { private long low; private long range; - private int code; + private long code; private int FFnum; private boolean carry; private int cache; @@ -21,6 +21,24 @@ public RangeCoder() { this.cache = 0; } + public void rangeDecodeStart(ByteBuffer inBuffer){ + for (int i = 0; i < 5; i++){ + + // Get next 5 bytes. Ensure it is +ve + code = (code << 8) + (inBuffer.get() & 0xFF); + } + } + + public void rangeDecode(ByteBuffer inBuffer, int sym_low, int sym_freq, int tot_freq){ + code -= sym_low * range; + range *= sym_freq; + + while (range < (1<<24)) { + range <<= 8; + code = (code << 8) + (inBuffer.get() & 0xFF); // Ensure code is positive + } + } + public int rangeGetFrequency(final int tot_freq){ range = (long) Math.floor(range / tot_freq); return (int) Math.floor(code / range); @@ -33,7 +51,6 @@ public void rangeShiftLow(ByteBuffer outBuffer) { // range must be less than (2^24) or (1<<24) or (0x1000000) // "cache" holds the top byte that will be flushed to the output - if ((low < 0xff000000L) || carry) { //TODO: 0xff000000L make this magic number a constant if (carry == false) { outBuffer.put((byte) cache); @@ -54,15 +71,14 @@ public void rangeShiftLow(ByteBuffer outBuffer) { } else { FFnum++; } - - low = (low<<8) & (0xFFFFFFFFL); // truncate top byte or keep bottom 4 bytes + low = low<<8 & (0xFFFFFFFFL); // force low to be +ve } public void rangeEncode(final ByteBuffer outBuffer, final int sym_low, final int sym_freq, final int tot_freq){ long old_low = low; range = (long) Math.floor(range/tot_freq); low += sym_low * range; - low = low & (0xFFFFFFFFL); // keep bottom 4 bytes + low &= 0xFFFFFFFFL; // keep bottom 4 bytes, shift the top byte out of low range *= sym_freq; if (low < old_low) { @@ -71,7 +87,7 @@ public void rangeEncode(final ByteBuffer outBuffer, final int sym_low, final int // Renormalise if range gets too small while (range < (1<<24)) { - range <<= 8; // range *= 256 + range <<= 8; rangeShiftLow(outBuffer); } @@ -79,7 +95,7 @@ public void rangeEncode(final ByteBuffer outBuffer, final int sym_low, final int public void rangeEncodeEnd(final ByteBuffer outBuffer){ //TODO: Where is the magic number 5 coming from? - for(int i=0; i<5;i++){ + for(int i = 0; i < 5; i++){ rangeShiftLow(outBuffer); } } diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java index add18ce357..56362cdb0f 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java @@ -1,5 +1,252 @@ package htsjdk.samtools.cram.compression.range; +import htsjdk.samtools.cram.compression.BZIP2ExternalCompressor; +import htsjdk.samtools.cram.compression.rans.Utils; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + public class RangeDecode { + private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + + public ByteBuffer uncompress(final ByteBuffer inBuffer) { + return uncompressStream(inBuffer, 0); + } + + public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { + if (inBuffer.remaining() == 0) { + return EMPTY_BUFFER; + } + + // TODO: little endian? +// inBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // the first byte of compressed stream gives the formatFlags + final int formatFlags = inBuffer.get() & 0xFF; + final RangeParams rangeParams = new RangeParams(formatFlags); + + // noSz + outSize = rangeParams.isNosz() ? outSize : Utils.readUint7(inBuffer); + + // order + + // stripe + if (rangeParams.isStripe()) { + return decodeStripe(inBuffer, outSize); + } + + // pack + if (rangeParams.isPack()){ + // decode packmeta + } + + // cat + ByteBuffer outBuffer = ByteBuffer.allocate(outSize); + if (rangeParams.isCAT()){ + byte[] data = new byte[outSize]; + inBuffer.get( data,0, outSize); + return ByteBuffer.wrap(data); + } else if (rangeParams.isExternalCompression()){ + uncompressEXT(inBuffer, outBuffer, outSize); + + + } else if (rangeParams.isRLE()){ + switch (rangeParams.getOrder()) { + case ZERO: + uncompressRLEOrder0(inBuffer, outBuffer, outSize); + break; + case ONE: + uncompressRLEOrder1(inBuffer, outBuffer, outSize); + break; + } + } else { + switch (rangeParams.getOrder()) { + case ZERO: + uncompressOrder0(inBuffer, outBuffer, outSize); + break; + case ONE: + uncompressOrder1(inBuffer, outBuffer, outSize); + break; + } + } + + + // pack + + if (rangeParams.isStripe()) { + // + } + + outBuffer.position(0); + return outBuffer; + + } + + private ByteBuffer uncompressOrder0( + final ByteBuffer inBuffer, + final ByteBuffer outBuffer, + final int outSize) { + + int maxSymbols = inBuffer.get() & 0xFF; + maxSymbols = maxSymbols==0 ? 256 : maxSymbols; + + final ByteModel byteModel = new ByteModel(maxSymbols); + final RangeCoder rangeCoder = new RangeCoder(); + rangeCoder.rangeDecodeStart(inBuffer); + + for (int i = 0; i < outSize; i++) { + outBuffer.put(i, (byte) byteModel.modelDecode(inBuffer, rangeCoder)); + } + return outBuffer; + } + + private ByteBuffer uncompressOrder1( + final ByteBuffer inBuffer, + final ByteBuffer outBuffer, + final int outSize) { + + int maxSymbols = inBuffer.get() & 0xFF; + maxSymbols = maxSymbols==0 ? 256 : maxSymbols; + + final List byteModelList = new ArrayList(); + + for(int i=0;i byteModelRunsList = new ArrayList(258); + for (int i=0; i <=257; i++){ + byteModelRunsList.add(i, new ByteModel(4)); + } + RangeCoder rangeCoder = new RangeCoder(); + rangeCoder.rangeDecodeStart(inBuffer); + + int i = 0; + while (i < outSize) { + outBuffer.put(i,(byte) modelLit.modelDecode(inBuffer, rangeCoder)); + int part = byteModelRunsList.get(outBuffer.get(i)&0xFF).modelDecode(inBuffer,rangeCoder); + int run = part; + int rctx = 256; + while (part == 3) { + part = byteModelRunsList.get(rctx).modelDecode(inBuffer, rangeCoder); + rctx = 257; + run += part; + } + for (int j = 1; j <= run; j++){ + outBuffer.put(i+j, outBuffer.get(i)); + } + i += run+1; + } + return outBuffer; + } + + private ByteBuffer uncompressRLEOrder1( + final ByteBuffer inBuffer, + final ByteBuffer outBuffer, + final int outSize) { + + int maxSymbols = inBuffer.get() & 0xFF; + maxSymbols = maxSymbols == 0 ? 256 : maxSymbols; + final List byteModelLitList = new ArrayList(maxSymbols); + for (int i=0; i < maxSymbols; i++) { + byteModelLitList.add(i,new ByteModel(maxSymbols)); + } + final List byteModelRunsList = new ArrayList(258); + for (int i=0; i <=257; i++){ + byteModelRunsList.add(i, new ByteModel(4)); + } + + RangeCoder rangeCoder = new RangeCoder(); + rangeCoder.rangeDecodeStart(inBuffer); + + int last = 0; + int i = 0; + while (i < outSize) { + outBuffer.put(i,(byte) byteModelLitList.get(last).modelDecode(inBuffer, rangeCoder)); + last = outBuffer.get(i) & 0xFF; + int part = byteModelRunsList.get(outBuffer.get(i)&0xFF).modelDecode(inBuffer,rangeCoder); + int run = part; + int rctx = 256; + while (part == 3) { + part = byteModelRunsList.get(rctx).modelDecode(inBuffer, rangeCoder); + rctx = 257; + run += part; + } + for (int j = 1; j <= run; j++){ + outBuffer.put(i+j, outBuffer.get(i)); + } + i += run+1; + } + return outBuffer; + } + + private ByteBuffer uncompressEXT( + final ByteBuffer inBuffer, + final ByteBuffer outBuffer, + final int outSize) { + final BZIP2ExternalCompressor compressor = new BZIP2ExternalCompressor(); + byte[] data = new byte[outSize]; + inBuffer.get( data,0, outSize); + +// inBuffer.get(byte[] dst, inBuffer.position(),inBuffer.remaining()); + + return outBuffer; + } + + private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){ + + final int numInterleaveStreams = inBuffer.get() & 0xFF; + + // retrieve lengths of compressed interleaved streams + int[] clen = new int[numInterleaveStreams]; + for ( int j=0; j j){ + ulen[j]++; + } + + T[j] = uncompressStream(inBuffer, ulen[j]); + } + + // Transpose + ByteBuffer out = ByteBuffer.allocate(outSize); + for (int j = 0; j { @@ -14,10 +16,10 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar } final ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining()); + outBuffer.order(ByteOrder.BIG_ENDIAN); final int formatFlags = rangeParams.getFormatFlags(); - outBuffer.put((byte) (formatFlags)); // one byte for formatFlags + outBuffer.put((byte) (formatFlags)); - // NoSize if (!rangeParams.isNosz()) { // original size is not recorded int insize = inBuffer.remaining(); @@ -32,12 +34,11 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar } final RangeParams.ORDER order = rangeParams.getOrder(); - final int e_len = inputBuffer.remaining(); // e_len -> inSize + final int inSize = inputBuffer.remaining(); // e_len -> inSize // Pack if (rangeParams.isPack()) { final int[] frequencyTable = new int[Constants.NUMBER_OF_SYMBOLS]; - final int inSize = inputBuffer.remaining(); for (int i = 0; i < inSize; i ++) { frequencyTable[inputBuffer.get(i) & 0xFF]++; } @@ -60,37 +61,31 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar if (rangeParams.isCAT()){ + // Data is uncompressed + outBuffer.put(inputBuffer); + outBuffer.limit(outBuffer.position()); + outBuffer.rewind(); // set position to 0 + return outBuffer; } else if (rangeParams.isExternalCompression()){ - + // TODO } else if (rangeParams.isRLE()){ switch (rangeParams.getOrder()) { case ZERO: -// return compressRLEOrder0(inputBuffer, rangeParams, outBuffer); //src, e_len, this.stream + return compressRLEOrder0(inputBuffer, rangeParams, outBuffer); case ONE: -// return compressRLEOrder1(inputBuffer, rangeParams, outBuffer); //src, e_len, this.stream + return compressRLEOrder1(inputBuffer, rangeParams, outBuffer); } } else { switch (rangeParams.getOrder()) { case ZERO: - return compressOrder0(inputBuffer, rangeParams, outBuffer); //src, e_len, this.stream + return compressOrder0(inputBuffer, rangeParams, outBuffer); case ONE: -// return compressOrder1(inputBuffer, rangeParams, outBuffer); //src, e_len, this.stream + return compressOrder1(inputBuffer, rangeParams, outBuffer); } } - - -// if flagsAND Cat then -// data ReadData(len) -// else if flagsAND Ext then -// data DecodeEXT(len) -// else if flagsAND RLE then -// ... -// else .. - - // // step 1: Encode meta-data // var pack_meta // if (flags & ARITH_PACK) @@ -100,13 +95,6 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar // if (flags & ARITH_PACK) // this.stream.WriteStream(pack_meta) - - - - - - - // temp return inBuffer; } @@ -118,8 +106,8 @@ private ByteBuffer compressOrder0 ( int maxSymbol = 0; final int inSize = inBuffer.remaining(); for (int i = 0; i < inSize; i++){ - if(maxSymbol < inBuffer.get(i)){ - maxSymbol = inBuffer.get(i); + if(maxSymbol < (inBuffer.get(i) & 0xFF)){ + maxSymbol = inBuffer.get(i) & 0xFF; } } maxSymbol++; // TODO: Is this correct? Not what spec states!! @@ -127,12 +115,45 @@ private ByteBuffer compressOrder0 ( // TODO: initialize byteModel -> set and reset symbols? ByteModel byteModel = new ByteModel(maxSymbol); outBuffer.put((byte) maxSymbol); + RangeCoder rangeCoder = new RangeCoder(); + for (int i = 0; i < inSize; i++){ + byteModel.modelEncode(outBuffer,rangeCoder,inBuffer.get(i)&0xFF); + } + rangeCoder.rangeEncodeEnd(outBuffer); + outBuffer.limit(outBuffer.position()); + outBuffer.rewind(); + return outBuffer; + } + + private ByteBuffer compressOrder1 ( + final ByteBuffer inBuffer, + final RangeParams rangeParams, + final ByteBuffer outBuffer) { + int maxSymbol = 0; + final int inSize = inBuffer.remaining(); + for (int i = 0; i < inSize; i++){ + if(maxSymbol < (inBuffer.get(i) & 0xFF)){ + maxSymbol = inBuffer.get(i) & 0xFF; + } + } + maxSymbol++; // TODO: Is this correct? Not what spec states!! + + final List byteModelList = new ArrayList(); + + // TODO: initialize byteModel -> set and reset symbols? + + for(int i=0;i byteModelRunsList = new ArrayList(258); + for (int i=0; i <= 257; i++){ + byteModelRunsList.add(i,new ByteModel(4)); + } + outBuffer.put((byte)maxSymbols); + RangeCoder rangeCoder = new RangeCoder(); + int i = 0; + while (i < inSize) { + modelLit.modelEncode(outBuffer, rangeCoder, inBuffer.get(i) & 0xFF); + int run = 1; + while (i+run < inSize && (inBuffer.get(i+run) & 0xFF)== (inBuffer.get(i) & 0xFF)){ + run++; + } + run--; // Check this!! + int rctx = inBuffer.get(i) & 0xFF; + int last = inBuffer.get(i) & 0xFF; + i += run+1; + int part = run >=3 ? 3 : run; + byteModelRunsList.get(rctx).modelEncode(outBuffer, rangeCoder, part); + run -= part; + rctx = 256; + while (part == 3){ + part = run >=3 ? 3 : run; + byteModelRunsList.get(rctx).modelEncode(outBuffer,rangeCoder,part); + rctx = 257; + run -= part; + } + } + rangeCoder.rangeEncodeEnd(outBuffer); + outBuffer.limit(outBuffer.position()); + outBuffer.rewind(); + return outBuffer; + } + + private ByteBuffer compressRLEOrder1 ( + final ByteBuffer inBuffer, + final RangeParams rangeParams, + final ByteBuffer outBuffer) { + int maxSymbols = 0; + int inSize = inBuffer.remaining(); + for (int i = 0; i < inSize; i++) { + if (maxSymbols < (inBuffer.get(i) & 0xFF)) { + maxSymbols = inBuffer.get(i) & 0xFF; + } + } + maxSymbols++; // FIXME not what spec states! + final List modelLitList = new ArrayList<>(maxSymbols); + for (int i = 0; i < maxSymbols; i++){ + modelLitList.add(i, new ByteModel(maxSymbols)); + } + final List byteModelRunsList = new ArrayList(258); + for (int i=0; i <= 257; i++){ + byteModelRunsList.add(i,new ByteModel(4)); + } + outBuffer.put((byte)maxSymbols); + RangeCoder rangeCoder = new RangeCoder(); + int i = 0; + int last = 0; + while (i < inSize) { + modelLitList.get(last).modelEncode(outBuffer, rangeCoder, inBuffer.get(i) & 0xFF); + int run = 1; + while (i+run < inSize && inBuffer.get(i+run) == inBuffer.get(i)){ + run++; + } + run--; // Check this!! + int rctx = inBuffer.get(i) & 0xFF; + last = inBuffer.get(i) & 0xFF; + i += run+1; + int part = run >=3 ? 3 : run; + byteModelRunsList.get(rctx).modelEncode(outBuffer, rangeCoder, part); + run -= part; + rctx = 256; + while (part == 3){ + part = run >=3 ? 3 : run; + byteModelRunsList.get(rctx).modelEncode(outBuffer,rangeCoder,part); + rctx = 257; + run -= part; + } + } + rangeCoder.rangeEncodeEnd(outBuffer); + outBuffer.limit(outBuffer.position()); + outBuffer.rewind(); + return outBuffer; + } protected ByteBuffer allocateOutputBuffer(final int inSize) { diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java index 0c0cb193a8..b017aa9e54 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java @@ -17,12 +17,13 @@ public class RangeParams { private static final int FORMAT_FLAG_MASK = 0xFF; - enum ORDER { + public enum ORDER { ZERO, ONE; public static RangeParams.ORDER fromInt(final int orderValue) { try { - return RangeParams.ORDER.values()[orderValue]; + ORDER[] x = ORDER.values(); + return x[orderValue]; } catch (final ArrayIndexOutOfBoundsException e) { throw new IllegalArgumentException("Unknown Range order: " + orderValue, e); } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index 25b9b773e9..e2bbc4f290 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -184,6 +184,8 @@ private void uncompressOrder1Way4(final ByteBuffer inBuffer, final ByteBuffer ou final byte c7 = D[0xFF & l7].reverseLookup[Utils.RANSGetCumulativeFrequency(rans7, Constants.TOTAL_FREQ_SHIFT)]; outBuffer.put(i7, c7); rans7 = syms[0xFF & l7][0xFF & c7].advanceSymbol4x8(rans7, inBuffer, Constants.TOTAL_FREQ_SHIFT); + // TODO: the spec specifies renormalize here + // rans7 = Utils.RANSDecodeRenormalize4x8(rans7, inBuffer); l7 = c7; } } diff --git a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java index 71b7b39555..1c990e65c4 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java @@ -33,8 +33,17 @@ public static Path getInteropTestDataLocation() { return Paths.get(INTEROP_TEST_FILES_PATH); } + // Given a test file name and the codec, map it to the corresponding compressed file path + public static final Path getCompressedCodecPath(final String codecType, final Path uncompressedInteropPath, int formatFlags) { + + // Example uncompressedInteropPath: q4, codecType: r4x16, formatFlags: 193 => compressedFileName: r4x16/q4.193 + // the substring after "." in the compressedFileName is the formatFlags or the first byte of the compressed stream + final String compressedFileName = String.format("%s/%s.%s", codecType, uncompressedInteropPath.getFileName(), formatFlags); + return uncompressedInteropPath.getParent().resolve(compressedFileName); + } + // the input files have embedded newlines that the test remove before round-tripping... - protected static final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { + public static final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { // 1. filters new lines if any. // 2. "q40+dir" file has an extra column delimited by tab. This column provides READ1 vs READ2 flag. // This file is also new-line separated. The extra column, '\t' and '\n' are filtered. diff --git a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java new file mode 100644 index 0000000000..c791f5b6c9 --- /dev/null +++ b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java @@ -0,0 +1,153 @@ +package htsjdk.samtools.cram; + +import htsjdk.HtsjdkTest; +import htsjdk.samtools.cram.compression.range.RangeDecode; +import htsjdk.samtools.cram.compression.range.RangeEncode; +import htsjdk.samtools.cram.compression.range.RangeParams; +import org.apache.commons.compress.utils.IOUtils; +import org.testng.Assert; +import org.testng.SkipException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Stream; + +public class RangeInteropTest extends HtsjdkTest { + public static final String COMPRESSED_RANGE_DIR = "arith"; + + @DataProvider(name = "allRangeCodecsAndDataForRoundtrip") + public Object[][] getAllRangeCodecsAndDataForRoundtrip() throws IOException { + final List rangeParamsFormatFlagList = Arrays.asList( + 0x00, + RangeParams.ORDER_FLAG_MASK, + RangeParams.RLE_FLAG_MASK, + RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK + ); + final List testCases = new ArrayList<>(); + getInteropRangeTestFiles() + .forEach(path -> + rangeParamsFormatFlagList.stream().map(rangeParamsFormatFlag -> new Object[]{ + path, + new RangeEncode(), + new RangeDecode(), + new RangeParams(rangeParamsFormatFlag), + COMPRESSED_RANGE_DIR + }).forEach(testCases::add)); + return testCases.toArray(new Object[][]{}); + } + + public Object[][] getRangeDecodeOnlyTestData() throws IOException { + final List rangeParamsFormatFlagList = Arrays.asList( + RangeParams.STRIPE_FLAG_MASK, // TODO: doesn't work because pack is not implemented yet!!! + RangeParams.ORDER_FLAG_MASK|RangeParams.STRIPE_FLAG_MASK); + final List testCases = new ArrayList<>(); + getInteropRangeTestFiles() + .forEach(path -> + rangeParamsFormatFlagList.stream().map(rangeParamsFormatFlag -> new Object[]{ + path, + new RangeEncode(), + new RangeDecode(), + new RangeParams(rangeParamsFormatFlag), + COMPRESSED_RANGE_DIR + }).forEach(testCases::add)); + return testCases.toArray(new Object[][]{}); + } + + @DataProvider(name = "allRangeCodecsAndData") + public Object[][] getAllRangeCodecs() throws IOException { + + // params: + // uncompressed testfile path, Range encoder, Range decoder, + // Range params, compressed testfile directory name + return Stream.concat(Arrays.stream(getAllRangeCodecsAndDataForRoundtrip()), Arrays.stream(getRangeDecodeOnlyTestData())) + .toArray(Object[][]::new); + } + + @Test(description = "Test if CRAM Interop Test Data is available") + public void testGetHTSCodecsCorpus() { + if (!CRAMInteropTestUtils.isInteropTestDataAvailable()) { + throw new SkipException(String.format("CRAM Interop Test Data is not available at %s", + CRAMInteropTestUtils.INTEROP_TEST_FILES_PATH)); + } + } + + @Test ( + dependsOnMethods = "testGetHTSCodecsCorpus", + dataProvider = "allRangeCodecsAndDataForRoundtrip", + description = "Roundtrip using htsjdk Range Codec. Compare the output with the original file" ) + public void testRangeRoundTrip( + final Path uncompressedInteropPath, + final RangeEncode rangeEncode, + final RangeDecode rangeDecode, + final RangeParams params, + final String unusedCompressedDirname) throws IOException { + final Path preCompressedInteropPath = CRAMInteropTestUtils.getCompressedCodecPath(COMPRESSED_RANGE_DIR,uncompressedInteropPath, params.getFormatFlags()); + try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); + final InputStream preCompressedInteropStream = Files.newInputStream(preCompressedInteropPath)) { + final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream)); + final ByteBuffer compressedHtsjdkBytes = rangeEncode.compress(uncompressedInteropBytes, params); + Assert.assertEquals(compressedHtsjdkBytes, preCompressedInteropBytes); + Assert.assertEquals(rangeDecode.uncompress(compressedHtsjdkBytes), uncompressedInteropBytes); + } + + } + + @Test ( + dependsOnMethods = "testGetHTSCodecsCorpus", + dataProvider = "allRangeCodecsAndData", + description = "Compress the original file using htsjdk Range Codec and compare it with the existing compressed file. " + + "Uncompress the existing compressed file using htsjdk Range Codec and compare it with the original file.") + public void testRangePreCompressed( + final Path uncompressedInteropPath, + final RangeEncode unused, + final RangeDecode rangeDecode, + final RangeParams params, + final String compressedInteropDirName) throws IOException { + + final Path preCompressedInteropPath = CRAMInteropTestUtils.getCompressedCodecPath(compressedInteropDirName,uncompressedInteropPath, params.getFormatFlags()); + + try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); + final InputStream preCompressedInteropStream = Files.newInputStream(preCompressedInteropPath) + ) { + // preprocess the uncompressed data (to match what the htscodecs-library test harness does) + // by filtering out the embedded newlines, and then round trip through Range Codec and compare the + // results + final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + + final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream)); + + // Use htsjdk to uncompress the precompressed file from htscodecs repo + final ByteBuffer uncompressedHtsjdkBytes = rangeDecode.uncompress(preCompressedInteropBytes); + + // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo + Assert.assertEquals(uncompressedHtsjdkBytes, uncompressedInteropBytes); + } catch (final NoSuchFileException ex){ + throw new SkipException("Skipping testRangePrecompressed as either input file " + + "or precompressed file is missing.", ex); + } + } + + // return a list of all Range test data files in the htscodecs/tests/dat directory + private List getInteropRangeTestFiles() throws IOException { + final List paths = new ArrayList<>(); + Files.newDirectoryStream( + CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat"), + path -> path.getFileName().startsWith("q4") || + path.getFileName().startsWith("q8") || + path.getFileName().startsWith("qvar") || + path.getFileName().startsWith("q40+dir")) + .forEach(path -> paths.add(path)); + return paths; + } + +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java index 33f456bd8a..97f54cb57c 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java @@ -1,11 +1,23 @@ package htsjdk.samtools.cram.compression.range; import htsjdk.HtsjdkTest; +import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.util.TestUtil; +import htsjdk.utils.TestNGUtils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.function.BiFunction; +import java.util.stream.Stream; public class RangeTest extends HtsjdkTest { + private final Random random = new Random(TestUtil.RANDOM_SEED); private static class TestDataEnvelope { public final byte[] testArray; @@ -17,16 +29,185 @@ public String toString() { } } + public Object[][] getRangeEmptyTestData() { + return new Object[][]{ + { new TestDataEnvelope(new byte[]{}) }, + }; + } + + @DataProvider(name = "rangeTestData") + public Object[][] getRangeTestData() { + return new Object[][] { + { new TestDataEnvelope(new byte[] {0}) }, + { new TestDataEnvelope(new byte[] {0, 1}) }, + { new TestDataEnvelope(new byte[] {0, 1, 2}) }, + { new TestDataEnvelope(new byte[] {0, 1, 2, 3}) }, + { new TestDataEnvelope(new byte[1000]) }, + { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> (byte) 1)) }, + { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> Byte.MIN_VALUE)) }, + { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> Byte.MAX_VALUE)) }, + { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> (byte) index.intValue())) }, + { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> index < n / 2 ? (byte) 0 : (byte) 1)) }, + { new TestDataEnvelope(getNBytesWithValues(1000, (n, index) -> index < n % 2 ? (byte) 0 : (byte) 1)) }, + { new TestDataEnvelope(randomBytesFromGeometricDistribution(1000, 0.1)) }, + { new TestDataEnvelope(randomBytesFromGeometricDistribution(1000, 0.01)) }, + { new TestDataEnvelope(randomBytesFromGeometricDistribution(10 * 1000 * 1000 + 1, 0.01)) }, + }; + } + + public Object[][] getRangeTestDataTinySmallLarge() { + + // params: test data, lower limit, upper limit + return new Object[][]{ + { new TestDataEnvelope(randomBytesFromGeometricDistribution(100, 0.1)), 1, 100 }, // Tiny + { new TestDataEnvelope(randomBytesFromGeometricDistribution(1000, 0.01)), 4, 1000 }, // Small + { new TestDataEnvelope(randomBytesFromGeometricDistribution(100 * 1000 + 3, 0.01)), 100 * 1000 + 3 - 4, 100 * 1000 + 3 } // Large + }; + } + + @DataProvider(name="rangeCodecs") + public Object[][] getRangeCodecs() { + + // params: RangeEncoder, RangeDecoder, RangeParams + final List rangeParamsFormatFlagList = Arrays.asList( + 0x00, + RangeParams.ORDER_FLAG_MASK, + RangeParams.RLE_FLAG_MASK, + RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, + RangeParams.CAT_FLAG_MASK, + RangeParams.CAT_FLAG_MASK | RangeParams.ORDER_FLAG_MASK); + final List testCases = new ArrayList<>(); + for (Integer rangeParamsFormatFlag : rangeParamsFormatFlagList) { + Object[] objects = new Object[]{ + new RangeEncode(), + new RangeDecode(), + new RangeParams(rangeParamsFormatFlag) + }; + testCases.add(objects); + } + return testCases.toArray(new Object[][]{}); + } + + public Object[][] getRangeDecodeOnlyCodecs() { + + // params: Range encoder, Range decoder, Range params + return new Object[][]{ + {new RangeEncode(), new RangeDecode(), new RangeParams(RangeParams.STRIPE_FLAG_MASK)}, + {new RangeEncode(), new RangeDecode(), new RangeParams(RangeParams.ORDER_FLAG_MASK|RangeParams.STRIPE_FLAG_MASK)} + }; + } + + @DataProvider(name="RangeDecodeOnlyAndData") + public Object[][] getRangeDecodeOnlyAndData() { + + // params: Range encoder, Range decoder, Range params, test data + // this data provider provides all the non-empty testdata input for Range codec + return TestNGUtils.cartesianProduct(getRangeDecodeOnlyCodecs(), getRangeTestData()); + } + + @DataProvider(name="allRangeCodecsAndData") + public Object[][] getAllRangeCodecsAndData() { + + // params: RangeEncode, RangeDecode, RangeParams, test data + // this data provider provides all the testdata for all of Range codecs + return Stream.concat( + Arrays.stream(TestNGUtils.cartesianProduct(getRangeCodecs(), getRangeTestData())), + Arrays.stream(TestNGUtils.cartesianProduct(getRangeCodecs(), getRangeEmptyTestData()))) + .toArray(Object[][]::new); + } + + @DataProvider(name="allRangeCodecsAndDataForTinySmallLarge") + public Object[][] allRangeCodecsAndDataForTinySmallLarge() { + + // params: RangeEncode, RangeDecode, RangeParams, test data, lower limit, upper limit + // this data provider provides Tiny, Small and Large testdata for all of Range codecs + return TestNGUtils.cartesianProduct(getRangeCodecs(), getRangeTestDataTinySmallLarge()); + } + + @Test(dataProvider = "allRangeCodecsAndData") + public void testRoundTrip(final RangeEncode rangeEncode, + final RangeDecode rangeDecode, + final RangeParams rangeParams, + final TestDataEnvelope td) { + rangeRoundTrip(rangeEncode, rangeDecode, rangeParams, ByteBuffer.wrap(td.testArray)); + } + + @Test(dataProvider = "allRangeCodecsAndDataForTinySmallLarge") + public void testRoundTripTinySmallLarge( + final RangeEncode rangeEncode, + final RangeDecode rangeDecode, + final RangeParams rangeParams, + final TestDataEnvelope td, + final Integer lowerLimit, + final Integer upperLimit){ + final ByteBuffer in = ByteBuffer.wrap(td.testArray); + for (int size = lowerLimit; size < upperLimit; size++) { + in.position(0); + in.limit(size); + rangeRoundTrip(rangeEncode, rangeDecode, rangeParams, in); + } + } - @Test - public void testRoundTrip(){ - final RangeEncode rangeEncode = new RangeEncode(); - final RangeParams rangeParams = new RangeParams(0); - TestDataEnvelope td = new TestDataEnvelope(new byte[]{0, 1, 2, 3}); + @Test( + dataProvider = "RangeDecodeOnlyAndData", + expectedExceptions = { CRAMException.class }, + expectedExceptionsMessageRegExp = "Range Encoding with Stripe Flag is not implemented.") + public void testRangeEncodeStripe( + final RangeEncode rangeEncode, + final RangeDecode unused, + final RangeParams params, + final TestDataEnvelope td) { - ByteBuffer inputData = ByteBuffer.wrap(td.testArray); - final ByteBuffer outBuffer = rangeEncode.compress(inputData,rangeParams); + // When td is not Empty, Encoding with Stripe Flag should throw an Exception + // as Encode Stripe is not implemented + final ByteBuffer compressed = rangeEncode.compress(ByteBuffer.wrap(td.testArray), params); + } + + // testRangeBuffersMeetBoundaryExpectations + // testRangeHeader + // testRangeEncodeStripe + + private static void rangeRoundTrip( + final RangeEncode rangeEncode, + final RangeDecode rangeDecode, + final RangeParams rangeParams, + final ByteBuffer data) { + final ByteBuffer compressed = rangeEncode.compress(data, rangeParams); + final ByteBuffer uncompressed = rangeDecode.uncompress(compressed); + data.rewind(); + Assert.assertEquals(data, uncompressed); + } + +// TODO: Add to utils + private byte[] getNBytesWithValues(final int n, final BiFunction valueForIndex) { + final byte[] data = new byte[n]; + for (int i = 0; i < data.length; i++) { + data[i] = valueForIndex.apply(n, i); + } + return data; + } + // TODO: Add to utils + private byte[] randomBytesFromGeometricDistribution(final int size, final double p) { + final byte[] data = new byte[size]; + for (int i = 0; i < data.length; i++) { + data[i] = drawByteFromGeometricDistribution(p); + } + return data; + } + /** + * A crude implementation of RNG for sampling geometric distribution. The + * value returned is offset by -1 to include zero. For testing purposes + * only, no refunds! + * + * @param probability the probability of success + * @return an almost random byte value. + */ + // TODO: Add to utils + private byte drawByteFromGeometricDistribution(final double probability) { + final double rand = random.nextDouble(); + final double g = Math.ceil(Math.log(1 - rand) / Math.log(1 - probability)) - 1; + return (byte) g; } } \ No newline at end of file From 7b454d6483b24de26b696bd7bd9f2652dd195b3a Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Wed, 14 Dec 2022 13:53:57 -0500 Subject: [PATCH 67/76] Add uncompressEXT and decodePack to RangeDecode --- .../cram/compression/range/RangeDecode.java | 95 +++++++++++++++---- .../samtools/cram/RangeInteropTest.java | 1 + 2 files changed, 77 insertions(+), 19 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java index 56362cdb0f..78462424c4 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.compression.range; +import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.compression.BZIP2ExternalCompressor; import htsjdk.samtools.cram.compression.rans.Utils; @@ -30,16 +31,30 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { // noSz outSize = rangeParams.isNosz() ? outSize : Utils.readUint7(inBuffer); - // order - // stripe if (rangeParams.isStripe()) { return decodeStripe(inBuffer, outSize); } // pack + // if pack, get pack metadata, which will be used later to decode packed data + int packDataLength = 0; + int numSymbols = 0; + int[] packMappingTable = new int[0]; if (rangeParams.isPack()){ - // decode packmeta + packDataLength = outSize; + numSymbols = inBuffer.get() & 0xFF; + + // if (numSymbols > 16 or numSymbols==0), raise exception + if (numSymbols <= 16 && numSymbols!=0) { + packMappingTable = new int[numSymbols]; + for (int i = 0; i < numSymbols; i++) { + packMappingTable[i] = inBuffer.get() & 0xFF; + } + outSize = Utils.readUint7(inBuffer); + } else { + throw new CRAMException("Bit Packing is not permitted when number of distinct symbols is greater than 16 or equal to 0. Number of distinct symbols: " + numSymbols); + } } // cat @@ -49,9 +64,9 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { inBuffer.get( data,0, outSize); return ByteBuffer.wrap(data); } else if (rangeParams.isExternalCompression()){ - uncompressEXT(inBuffer, outBuffer, outSize); - - + byte[] extCompressedBytes = new byte[inBuffer.remaining()]; + inBuffer.get( extCompressedBytes,inBuffer.position(), inBuffer.remaining()); + uncompressEXT(extCompressedBytes, outBuffer); } else if (rangeParams.isRLE()){ switch (rangeParams.getOrder()) { case ZERO: @@ -72,11 +87,9 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { } } - - // pack - - if (rangeParams.isStripe()) { - // + // if pack, then decodePack + if (rangeParams.isPack() && packMappingTable.length > 0) { + outBuffer = decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); } outBuffer.position(0); @@ -203,16 +216,61 @@ private ByteBuffer uncompressRLEOrder1( } private ByteBuffer uncompressEXT( - final ByteBuffer inBuffer, - final ByteBuffer outBuffer, - final int outSize) { + final byte[] extCompressedBytes, + final ByteBuffer outBuffer) { final BZIP2ExternalCompressor compressor = new BZIP2ExternalCompressor(); - byte[] data = new byte[outSize]; - inBuffer.get( data,0, outSize); + final byte [] extUncompressedBytes = compressor.uncompress(extCompressedBytes); + outBuffer.put(extUncompressedBytes); + return outBuffer; + } -// inBuffer.get(byte[] dst, inBuffer.position(),inBuffer.remaining()); + private ByteBuffer decodePack(ByteBuffer inBuffer, final int[] packMappingTable, int numSymbols, int uncompressedPackOutputLength) { + ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength); + int j = 0; - return outBuffer; + if (numSymbols <= 1) { + for (int i=0; i < uncompressedPackOutputLength; i++){ + outBufferPack.put(i, (byte) packMappingTable[0]); + } + } + + // 1 bit per value + else if (numSymbols <= 2) { + int v = 0; + for (int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 8 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, (byte) packMappingTable[v & 1]); + v >>=1; + } + } + + // 2 bits per value + else if (numSymbols <= 4){ + int v = 0; + for(int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 4 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, (byte) packMappingTable[v & 3]); + v >>=2; + } + } + + // 4 bits per value + else if (numSymbols <= 16){ + int v = 0; + for(int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 2 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, (byte) packMappingTable[v & 15]); + v >>=4; + } + } + inBuffer = outBufferPack; + return inBuffer; } private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){ @@ -245,7 +303,6 @@ private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){ out.put((i*numInterleaveStreams)+j, T[j].get(i)); } } - return out; } diff --git a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java index c791f5b6c9..1585c30fb0 100644 --- a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java @@ -31,6 +31,7 @@ public Object[][] getAllRangeCodecsAndDataForRoundtrip() throws IOException { RangeParams.ORDER_FLAG_MASK, RangeParams.RLE_FLAG_MASK, RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK + // TODO: No interop test files avaiable for EXT ); final List testCases = new ArrayList<>(); getInteropRangeTestFiles() From 81bcac7fc0ab11919130476ca7895102e68cd7cd Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Fri, 16 Dec 2022 14:59:18 -0500 Subject: [PATCH 68/76] add Pack flag to tests --- .../cram/compression/range/RangeDecode.java | 1 - .../samtools/cram/RangeInteropTest.java | 5 +++- .../cram/compression/range/RangeTest.java | 24 ++++++++++++++----- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java index 78462424c4..342a62f6d9 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java @@ -57,7 +57,6 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { } } - // cat ByteBuffer outBuffer = ByteBuffer.allocate(outSize); if (rangeParams.isCAT()){ byte[] data = new byte[outSize]; diff --git a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java index 1585c30fb0..a2b53b95c4 100644 --- a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java @@ -99,7 +99,10 @@ public void testRangeRoundTrip( final ByteBuffer compressedHtsjdkBytes = rangeEncode.compress(uncompressedInteropBytes, params); Assert.assertEquals(compressedHtsjdkBytes, preCompressedInteropBytes); Assert.assertEquals(rangeDecode.uncompress(compressedHtsjdkBytes), uncompressedInteropBytes); - } + } catch (final NoSuchFileException ex){ + throw new SkipException("Skipping testRangeRoundTrip as either input file " + + "or precompressed file is missing.", ex); + } } diff --git a/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java index 97f54cb57c..8bce5433b0 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java @@ -75,7 +75,11 @@ public Object[][] getRangeCodecs() { RangeParams.RLE_FLAG_MASK, RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, RangeParams.CAT_FLAG_MASK, - RangeParams.CAT_FLAG_MASK | RangeParams.ORDER_FLAG_MASK); + RangeParams.CAT_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, + RangeParams.PACK_FLAG_MASK, + RangeParams.PACK_FLAG_MASK | RangeParams. ORDER_FLAG_MASK, + RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK, + RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK); final List testCases = new ArrayList<>(); for (Integer rangeParamsFormatFlag : rangeParamsFormatFlagList) { Object[] objects = new Object[]{ @@ -89,12 +93,20 @@ public Object[][] getRangeCodecs() { } public Object[][] getRangeDecodeOnlyCodecs() { - // params: Range encoder, Range decoder, Range params - return new Object[][]{ - {new RangeEncode(), new RangeDecode(), new RangeParams(RangeParams.STRIPE_FLAG_MASK)}, - {new RangeEncode(), new RangeDecode(), new RangeParams(RangeParams.ORDER_FLAG_MASK|RangeParams.STRIPE_FLAG_MASK)} - }; + final List rangeParamsFormatFlagList = Arrays.asList( + RangeParams.STRIPE_FLAG_MASK, + RangeParams.STRIPE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK); + final List testCases = new ArrayList<>(); + for (Integer rangeParamsFormatFlag : rangeParamsFormatFlagList) { + Object[] objects = new Object[]{ + new RangeEncode(), + new RangeDecode(), + new RangeParams(rangeParamsFormatFlag) + }; + testCases.add(objects); + } + return testCases.toArray(new Object[][]{}); } @DataProvider(name="RangeDecodeOnlyAndData") From 5de4036ec9ab9d56a2ddda5394c7d0f8915fff7e Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Thu, 12 Jan 2023 10:35:39 -0500 Subject: [PATCH 69/76] Add Range encode and decode for EXT flag --- .../cram/compression/range/RangeDecode.java | 11 +++++--- .../cram/compression/range/RangeEncode.java | 26 ++++++++----------- .../cram/compression/range/RangeTest.java | 4 ++- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java index 342a62f6d9..ca33d32a62 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java @@ -64,7 +64,13 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { return ByteBuffer.wrap(data); } else if (rangeParams.isExternalCompression()){ byte[] extCompressedBytes = new byte[inBuffer.remaining()]; - inBuffer.get( extCompressedBytes,inBuffer.position(), inBuffer.remaining()); + int extCompressedBytesIdx = 0; + int start = inBuffer.position(); + int end = inBuffer.limit(); + for (int i = start; i < end; i++) { + extCompressedBytes[extCompressedBytesIdx] = inBuffer.get(); + extCompressedBytesIdx++; + } uncompressEXT(extCompressedBytes, outBuffer); } else if (rangeParams.isRLE()){ switch (rangeParams.getOrder()) { @@ -90,8 +96,7 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { if (rangeParams.isPack() && packMappingTable.length > 0) { outBuffer = decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); } - - outBuffer.position(0); + outBuffer.rewind(); return outBuffer; } diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java index 21b9f471af..22da461ab7 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java @@ -1,6 +1,7 @@ package htsjdk.samtools.cram.compression.range; import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.BZIP2ExternalCompressor; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.ArrayList; @@ -15,7 +16,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar return EMPTY_BUFFER; } - final ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining()); + ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining()); outBuffer.order(ByteOrder.BIG_ENDIAN); final int formatFlags = rangeParams.getFormatFlags(); outBuffer.put((byte) (formatFlags)); @@ -67,8 +68,14 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar outBuffer.rewind(); // set position to 0 return outBuffer; } else if (rangeParams.isExternalCompression()){ - - // TODO + byte[] rawBytes = new byte[inputBuffer.remaining()]; + inputBuffer.get( rawBytes,inBuffer.position(), inputBuffer.remaining()); + final BZIP2ExternalCompressor compressor = new BZIP2ExternalCompressor(); + final byte [] extCompressedBytes = compressor.compress(rawBytes); + outBuffer.put(extCompressedBytes); + outBuffer.limit(outBuffer.position()); + outBuffer.rewind(); // set position to 0 + return outBuffer; } else if (rangeParams.isRLE()){ switch (rangeParams.getOrder()) { case ZERO: @@ -85,17 +92,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar } } - -// // step 1: Encode meta-data -// var pack_meta -// if (flags & ARITH_PACK) -// [pack_meta, src, e_len] = this.encodePack(src) -// -// // step 2: Write any meta data -// if (flags & ARITH_PACK) -// this.stream.WriteStream(pack_meta) - - return inBuffer; + return outBuffer; } private ByteBuffer compressOrder0 ( @@ -268,7 +265,6 @@ private ByteBuffer compressRLEOrder1 ( return outBuffer; } - protected ByteBuffer allocateOutputBuffer(final int inSize) { // same as the allocateOutputBuffer in RANS4x8Encode and RANSNx16Encode diff --git a/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java index 8bce5433b0..1c3fb865dd 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java @@ -79,7 +79,9 @@ public Object[][] getRangeCodecs() { RangeParams.PACK_FLAG_MASK, RangeParams.PACK_FLAG_MASK | RangeParams. ORDER_FLAG_MASK, RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK, - RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK); + RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, + RangeParams.EXT_FLAG_MASK, + RangeParams.EXT_FLAG_MASK | RangeParams.PACK_FLAG_MASK); final List testCases = new ArrayList<>(); for (Integer rangeParamsFormatFlag : rangeParamsFormatFlagList) { Object[] objects = new Object[]{ From fc6227de28cedef0782cabba2653d067324802fc Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Thu, 12 Jan 2023 16:20:13 -0500 Subject: [PATCH 70/76] debug spotbugs error --- .../java/htsjdk/samtools/cram/compression/range/ByteModel.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java b/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java index be32d27613..86b891ee2e 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java @@ -101,7 +101,6 @@ public void modelEncode(final ByteBuffer outBuffer, RangeCoder rangeCoder, int s } // Keep symbols approximately frequency sorted (ascending order) - symbol = symbols[i]; if (i > 0 && frequencies[i] > frequencies[i-1]){ // swap frequencies int tmp = frequencies[i]; From 249db3057e1014b8dfb0a69dac0601484ad757f4 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 21 Mar 2023 14:32:39 -0400 Subject: [PATCH 71/76] debug - add decodePack on top of CAT flag --- .../samtools/cram/compression/range/RangeDecode.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java index ca33d32a62..a5d85e0946 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java @@ -13,10 +13,10 @@ public class RangeDecode { private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); public ByteBuffer uncompress(final ByteBuffer inBuffer) { - return uncompressStream(inBuffer, 0); + return uncompress(inBuffer, 0); } - public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { + public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } @@ -61,7 +61,7 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { if (rangeParams.isCAT()){ byte[] data = new byte[outSize]; inBuffer.get( data,0, outSize); - return ByteBuffer.wrap(data); + outBuffer = ByteBuffer.wrap(data); } else if (rangeParams.isExternalCompression()){ byte[] extCompressedBytes = new byte[inBuffer.remaining()]; int extCompressedBytesIdx = 0; @@ -297,7 +297,7 @@ private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){ ulen[j]++; } - T[j] = uncompressStream(inBuffer, ulen[j]); + T[j] = uncompress(inBuffer, ulen[j]); } // Transpose From e2d5a37092106b71bdeefe689354096ce7119125 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Wed, 25 Oct 2023 15:40:50 -0400 Subject: [PATCH 72/76] Addressing format related feedback from RANS PR that applies to Range Codec as well --- .../cram/compression/range/RangeCoder.java | 62 +++--- .../cram/compression/range/RangeDecode.java | 7 +- .../cram/compression/range/RangeEncode.java | 45 ++-- .../samtools/cram/RangeInteropTest.java | 193 ++++++++++-------- 4 files changed, 158 insertions(+), 149 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java index 98653914bb..763a65a3b2 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java @@ -11,7 +11,7 @@ public class RangeCoder { private boolean carry; private int cache; - public RangeCoder() { + protected RangeCoder() { // Spec: RangeEncodeStart this.low = 0; this.range = 0xFFFFFFFFL; // 4 bytes of all 1's @@ -21,7 +21,7 @@ public RangeCoder() { this.cache = 0; } - public void rangeDecodeStart(ByteBuffer inBuffer){ + protected void rangeDecodeStart(ByteBuffer inBuffer){ for (int i = 0; i < 5; i++){ // Get next 5 bytes. Ensure it is +ve @@ -29,7 +29,7 @@ public void rangeDecodeStart(ByteBuffer inBuffer){ } } - public void rangeDecode(ByteBuffer inBuffer, int sym_low, int sym_freq, int tot_freq){ + protected void rangeDecode(ByteBuffer inBuffer, int sym_low, int sym_freq, int tot_freq){ code -= sym_low * range; range *= sym_freq; @@ -39,12 +39,38 @@ public void rangeDecode(ByteBuffer inBuffer, int sym_low, int sym_freq, int tot_ } } - public int rangeGetFrequency(final int tot_freq){ + protected int rangeGetFrequency(final int tot_freq){ range = (long) Math.floor(range / tot_freq); return (int) Math.floor(code / range); } - public void rangeShiftLow(ByteBuffer outBuffer) { + protected void rangeEncode(final ByteBuffer outBuffer, final int sym_low, final int sym_freq, final int tot_freq){ + long old_low = low; + range = (long) Math.floor(range/tot_freq); + low += sym_low * range; + low &= 0xFFFFFFFFL; // keep bottom 4 bytes, shift the top byte out of low + range *= sym_freq; + + if (low < old_low) { + carry = true; + } + + // Renormalise if range gets too small + while (range < (1<<24)) { + range <<= 8; + rangeShiftLow(outBuffer); + } + + } + + protected void rangeEncodeEnd(final ByteBuffer outBuffer){ + //TODO: Where is the magic number 5 coming from? + for(int i = 0; i < 5; i++){ + rangeShiftLow(outBuffer); + } + } + + private void rangeShiftLow(ByteBuffer outBuffer) { // rangeShiftLow tracks the total number of extra bytes to emit and // carry indicates whether they are a string of 0xFF or 0x00 values @@ -74,30 +100,4 @@ public void rangeShiftLow(ByteBuffer outBuffer) { low = low<<8 & (0xFFFFFFFFL); // force low to be +ve } - public void rangeEncode(final ByteBuffer outBuffer, final int sym_low, final int sym_freq, final int tot_freq){ - long old_low = low; - range = (long) Math.floor(range/tot_freq); - low += sym_low * range; - low &= 0xFFFFFFFFL; // keep bottom 4 bytes, shift the top byte out of low - range *= sym_freq; - - if (low < old_low) { - carry = true; - } - - // Renormalise if range gets too small - while (range < (1<<24)) { - range <<= 8; - rangeShiftLow(outBuffer); - } - - } - - public void rangeEncodeEnd(final ByteBuffer outBuffer){ - //TODO: Where is the magic number 5 coming from? - for(int i = 0; i < 5; i++){ - rangeShiftLow(outBuffer); - } - } - } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java index a5d85e0946..6f892ae667 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java @@ -5,6 +5,7 @@ import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.util.ArrayList; import java.util.List; @@ -13,17 +14,15 @@ public class RangeDecode { private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); public ByteBuffer uncompress(final ByteBuffer inBuffer) { + inBuffer.order(ByteOrder.LITTLE_ENDIAN); return uncompress(inBuffer, 0); } - public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { + private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } - // TODO: little endian? -// inBuffer.order(ByteOrder.LITTLE_ENDIAN); - // the first byte of compressed stream gives the formatFlags final int formatFlags = inBuffer.get() & 0xFF; final RangeParams rangeParams = new RangeParams(formatFlags); diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java index 22da461ab7..38a9b46768 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java @@ -16,15 +16,14 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar return EMPTY_BUFFER; } - ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining()); + final ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining()); outBuffer.order(ByteOrder.BIG_ENDIAN); final int formatFlags = rangeParams.getFormatFlags(); outBuffer.put((byte) (formatFlags)); if (!rangeParams.isNosz()) { // original size is not recorded - int insize = inBuffer.remaining(); - Utils.writeUint7(insize,outBuffer); + Utils.writeUint7(inBuffer.remaining(),outBuffer); } ByteBuffer inputBuffer = inBuffer; @@ -68,7 +67,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar outBuffer.rewind(); // set position to 0 return outBuffer; } else if (rangeParams.isExternalCompression()){ - byte[] rawBytes = new byte[inputBuffer.remaining()]; + final byte[] rawBytes = new byte[inputBuffer.remaining()]; inputBuffer.get( rawBytes,inBuffer.position(), inputBuffer.remaining()); final BZIP2ExternalCompressor compressor = new BZIP2ExternalCompressor(); final byte [] extCompressedBytes = compressor.compress(rawBytes); @@ -79,16 +78,16 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar } else if (rangeParams.isRLE()){ switch (rangeParams.getOrder()) { case ZERO: - return compressRLEOrder0(inputBuffer, rangeParams, outBuffer); + return compressRLEOrder0(inputBuffer, outBuffer); case ONE: - return compressRLEOrder1(inputBuffer, rangeParams, outBuffer); + return compressRLEOrder1(inputBuffer, outBuffer); } } else { switch (rangeParams.getOrder()) { case ZERO: - return compressOrder0(inputBuffer, rangeParams, outBuffer); + return compressOrder0(inputBuffer, outBuffer); case ONE: - return compressOrder1(inputBuffer, rangeParams, outBuffer); + return compressOrder1(inputBuffer, outBuffer); } } @@ -97,7 +96,6 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar private ByteBuffer compressOrder0 ( final ByteBuffer inBuffer, - final RangeParams rangeParams, final ByteBuffer outBuffer) { int maxSymbol = 0; @@ -110,9 +108,9 @@ private ByteBuffer compressOrder0 ( maxSymbol++; // TODO: Is this correct? Not what spec states!! // TODO: initialize byteModel -> set and reset symbols? - ByteModel byteModel = new ByteModel(maxSymbol); + final ByteModel byteModel = new ByteModel(maxSymbol); outBuffer.put((byte) maxSymbol); - RangeCoder rangeCoder = new RangeCoder(); + final RangeCoder rangeCoder = new RangeCoder(); for (int i = 0; i < inSize; i++){ byteModel.modelEncode(outBuffer,rangeCoder,inBuffer.get(i)&0xFF); } @@ -124,7 +122,6 @@ private ByteBuffer compressOrder0 ( private ByteBuffer compressOrder1 ( final ByteBuffer inBuffer, - final RangeParams rangeParams, final ByteBuffer outBuffer) { int maxSymbol = 0; final int inSize = inBuffer.remaining(); @@ -145,7 +142,7 @@ private ByteBuffer compressOrder1 ( outBuffer.put((byte) maxSymbol); // TODO: should we pass outBuffer to rangecoder? - RangeCoder rangeCoder = new RangeCoder(); + final RangeCoder rangeCoder = new RangeCoder(); int last = 0; for (int i = 0; i < inSize; i++ ){ @@ -162,10 +159,9 @@ private ByteBuffer compressOrder1 ( private ByteBuffer compressRLEOrder0 ( final ByteBuffer inBuffer, - final RangeParams rangeParams, final ByteBuffer outBuffer) { int maxSymbols = 0; - int inSize = inBuffer.remaining(); + final int inSize = inBuffer.remaining(); for (int i = 0; i < inSize; i++) { if (maxSymbols < (inBuffer.get(i) & 0xFF)) { maxSymbols = inBuffer.get(i) & 0xFF; @@ -173,14 +169,14 @@ private ByteBuffer compressRLEOrder0 ( } maxSymbols++; // FIXME not what spec states! - ByteModel modelLit = new ByteModel(maxSymbols); + final ByteModel modelLit = new ByteModel(maxSymbols); final List byteModelRunsList = new ArrayList(258); for (int i=0; i <= 257; i++){ byteModelRunsList.add(i,new ByteModel(4)); } outBuffer.put((byte)maxSymbols); - RangeCoder rangeCoder = new RangeCoder(); + final RangeCoder rangeCoder = new RangeCoder(); int i = 0; @@ -192,7 +188,6 @@ private ByteBuffer compressRLEOrder0 ( } run--; // Check this!! int rctx = inBuffer.get(i) & 0xFF; - int last = inBuffer.get(i) & 0xFF; i += run+1; int part = run >=3 ? 3 : run; byteModelRunsList.get(rctx).modelEncode(outBuffer, rangeCoder, part); @@ -213,10 +208,9 @@ private ByteBuffer compressRLEOrder0 ( private ByteBuffer compressRLEOrder1 ( final ByteBuffer inBuffer, - final RangeParams rangeParams, final ByteBuffer outBuffer) { int maxSymbols = 0; - int inSize = inBuffer.remaining(); + final int inSize = inBuffer.remaining(); for (int i = 0; i < inSize; i++) { if (maxSymbols < (inBuffer.get(i) & 0xFF)) { maxSymbols = inBuffer.get(i) & 0xFF; @@ -233,7 +227,7 @@ private ByteBuffer compressRLEOrder1 ( byteModelRunsList.add(i,new ByteModel(4)); } outBuffer.put((byte)maxSymbols); - RangeCoder rangeCoder = new RangeCoder(); + final RangeCoder rangeCoder = new RangeCoder(); int i = 0; @@ -291,8 +285,7 @@ private ByteBuffer encodePack( } else if (numSymbols <= 2) { // 1 bit per value - int dataSize = (int) Math.ceil((double) inSize/8); - data = ByteBuffer.allocate(dataSize); + data = ByteBuffer.allocate((int) Math.ceil((double) inSize/8)); int j = -1; for (int i = 0; i < inSize; i ++) { if (i % 8 == 0) { @@ -303,8 +296,7 @@ private ByteBuffer encodePack( } else if (numSymbols <= 4) { // 2 bits per value - int dataSize = (int) Math.ceil((double) inSize/4); - data = ByteBuffer.allocate(dataSize); + data = ByteBuffer.allocate((int) Math.ceil((double) inSize/4)); int j = -1; for (int i = 0; i < inSize; i ++) { if (i % 4 == 0) { @@ -315,8 +307,7 @@ private ByteBuffer encodePack( } else { // 4 bits per value - int dataSize = (int) Math.ceil((double)inSize/2); - data = ByteBuffer.allocate(dataSize); + data = ByteBuffer.allocate((int) Math.ceil((double)inSize/2)); int j = -1; for (int i = 0; i < inSize; i ++) { if (i % 2 == 0) { diff --git a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java index a2b53b95c4..f35139190c 100644 --- a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java @@ -10,6 +10,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; @@ -17,64 +18,33 @@ import java.nio.file.NoSuchFileException; import java.nio.file.Path; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; -import java.util.stream.Stream; public class RangeInteropTest extends HtsjdkTest { public static final String COMPRESSED_RANGE_DIR = "arith"; - @DataProvider(name = "allRangeCodecsAndDataForRoundtrip") - public Object[][] getAllRangeCodecsAndDataForRoundtrip() throws IOException { - final List rangeParamsFormatFlagList = Arrays.asList( - 0x00, - RangeParams.ORDER_FLAG_MASK, - RangeParams.RLE_FLAG_MASK, - RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK - // TODO: No interop test files avaiable for EXT - ); - final List testCases = new ArrayList<>(); - getInteropRangeTestFiles() - .forEach(path -> - rangeParamsFormatFlagList.stream().map(rangeParamsFormatFlag -> new Object[]{ - path, - new RangeEncode(), - new RangeDecode(), - new RangeParams(rangeParamsFormatFlag), - COMPRESSED_RANGE_DIR - }).forEach(testCases::add)); - return testCases.toArray(new Object[][]{}); - } + @DataProvider(name = "roundTripTestCases") + public Object[][] getRoundTripTestCases() throws IOException { - public Object[][] getRangeDecodeOnlyTestData() throws IOException { - final List rangeParamsFormatFlagList = Arrays.asList( - RangeParams.STRIPE_FLAG_MASK, // TODO: doesn't work because pack is not implemented yet!!! - RangeParams.ORDER_FLAG_MASK|RangeParams.STRIPE_FLAG_MASK); + // params: + // compressed testfile path, uncompressed testfile path, + // Range encoder, Range decoder, Range params final List testCases = new ArrayList<>(); - getInteropRangeTestFiles() - .forEach(path -> - rangeParamsFormatFlagList.stream().map(rangeParamsFormatFlag -> new Object[]{ - path, - new RangeEncode(), - new RangeDecode(), - new RangeParams(rangeParamsFormatFlag), - COMPRESSED_RANGE_DIR - }).forEach(testCases::add)); + for (Path path : getInteropRangeCompressedFilePaths(COMPRESSED_RANGE_DIR)) { + Object[] objects = new Object[]{ + path, + getRangeUnCompressedFilePath(path), + new RangeEncode(), + new RangeDecode(), + getRangeParams(path) + }; + testCases.add(objects); + } return testCases.toArray(new Object[][]{}); } - @DataProvider(name = "allRangeCodecsAndData") - public Object[][] getAllRangeCodecs() throws IOException { - - // params: - // uncompressed testfile path, Range encoder, Range decoder, - // Range params, compressed testfile directory name - return Stream.concat(Arrays.stream(getAllRangeCodecsAndDataForRoundtrip()), Arrays.stream(getRangeDecodeOnlyTestData())) - .toArray(Object[][]::new); - } - @Test(description = "Test if CRAM Interop Test Data is available") - public void testGetHTSCodecsCorpus() { + public void testHtsCodecsCorpusIsAvailable() { if (!CRAMInteropTestUtils.isInteropTestDataAvailable()) { throw new SkipException(String.format("CRAM Interop Test Data is not available at %s", CRAMInteropTestUtils.INTEROP_TEST_FILES_PATH)); @@ -82,52 +52,50 @@ public void testGetHTSCodecsCorpus() { } @Test ( - dependsOnMethods = "testGetHTSCodecsCorpus", - dataProvider = "allRangeCodecsAndDataForRoundtrip", + dependsOnMethods = "testHtsCodecsCorpusIsAvailable", + dataProvider = "roundTripTestCases", description = "Roundtrip using htsjdk Range Codec. Compare the output with the original file" ) public void testRangeRoundTrip( - final Path uncompressedInteropPath, + final Path unusedCompressedFilePath, + final Path uncompressedFilePath, final RangeEncode rangeEncode, final RangeDecode rangeDecode, - final RangeParams params, - final String unusedCompressedDirname) throws IOException { - final Path preCompressedInteropPath = CRAMInteropTestUtils.getCompressedCodecPath(COMPRESSED_RANGE_DIR,uncompressedInteropPath, params.getFormatFlags()); - try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); - final InputStream preCompressedInteropStream = Files.newInputStream(preCompressedInteropPath)) { - final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); - final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream)); - final ByteBuffer compressedHtsjdkBytes = rangeEncode.compress(uncompressedInteropBytes, params); - Assert.assertEquals(compressedHtsjdkBytes, preCompressedInteropBytes); - Assert.assertEquals(rangeDecode.uncompress(compressedHtsjdkBytes), uncompressedInteropBytes); - } catch (final NoSuchFileException ex){ - throw new SkipException("Skipping testRangeRoundTrip as either input file " + - "or precompressed file is missing.", ex); - } + final RangeParams params) throws IOException { + try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedFilePath)) { + // preprocess the uncompressed data (to match what the htscodecs-library test harness does) + // by filtering out the embedded newlines, and then round trip through Range codec and compare the + // results + final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + + if (params.isStripe()) { + Assert.assertThrows(CRAMException.class, () -> rangeEncode.compress(uncompressedInteropBytes, params)); + } else { + final ByteBuffer compressedHtsjdkBytes = rangeEncode.compress(uncompressedInteropBytes, params); + uncompressedInteropBytes.rewind(); + Assert.assertEquals(rangeDecode.uncompress(compressedHtsjdkBytes), uncompressedInteropBytes); + } + } } @Test ( - dependsOnMethods = "testGetHTSCodecsCorpus", - dataProvider = "allRangeCodecsAndData", - description = "Compress the original file using htsjdk Range Codec and compare it with the existing compressed file. " + - "Uncompress the existing compressed file using htsjdk Range Codec and compare it with the original file.") - public void testRangePreCompressed( + dependsOnMethods = "testHtsCodecsCorpusIsAvailable", + dataProvider = "roundTripTestCases", + description = "Uncompress the existing compressed file using htsjdk Range codec and compare it with the original file.") + public void testDecodeOnly( + final Path compressedFilePath, final Path uncompressedInteropPath, - final RangeEncode unused, + final RangeEncode unusedRangeEncode, final RangeDecode rangeDecode, - final RangeParams params, - final String compressedInteropDirName) throws IOException { - - final Path preCompressedInteropPath = CRAMInteropTestUtils.getCompressedCodecPath(compressedInteropDirName,uncompressedInteropPath, params.getFormatFlags()); - + final RangeParams unusedRangeParams) throws IOException { try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); - final InputStream preCompressedInteropStream = Files.newInputStream(preCompressedInteropPath) + final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath) ) { + // preprocess the uncompressed data (to match what the htscodecs-library test harness does) - // by filtering out the embedded newlines, and then round trip through Range Codec and compare the + // by filtering out the embedded newlines, and then round trip through Range codec and compare the // results - final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); - + final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream)); // Use htsjdk to uncompress the precompressed file from htscodecs repo @@ -136,22 +104,73 @@ public void testRangePreCompressed( // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo Assert.assertEquals(uncompressedHtsjdkBytes, uncompressedInteropBytes); } catch (final NoSuchFileException ex){ - throw new SkipException("Skipping testRangePrecompressed as either input file " + + throw new SkipException("Skipping testDecodeOnly as either input file " + "or precompressed file is missing.", ex); } } - // return a list of all Range test data files in the htscodecs/tests/dat directory - private List getInteropRangeTestFiles() throws IOException { + // the input files have embedded newlines that the test remove before round-tripping... + private final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { + // 1. filters new lines if any. + // 2. "q40+dir" file has an extra column delimited by tab. This column provides READ1 vs READ2 flag. + // This file is also new-line separated. The extra column, '\t' and '\n' are filtered. + try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + int skip = 0; + for (final byte b : rawBytes) { + if (b == '\t'){ + skip = 1; + } + if (b == '\n') { + skip = 0; + } + if (skip == 0 && b !='\n') { + baos.write(b); + } + } + return baos.toByteArray(); + } + } + + // return a list of all encoded test data files in the htscodecs/tests/dat/ directory + private List getInteropRangeCompressedFilePaths(final String compressedDir) throws IOException { final List paths = new ArrayList<>(); Files.newDirectoryStream( - CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat"), - path -> path.getFileName().startsWith("q4") || - path.getFileName().startsWith("q8") || - path.getFileName().startsWith("qvar") || - path.getFileName().startsWith("q40+dir")) + CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat/"+compressedDir), + path -> Files.isRegularFile(path)) .forEach(path -> paths.add(path)); return paths; } + // Given a compressed test file path, return the corresponding uncompressed file path + public static final Path getRangeUnCompressedFilePath(final Path compressedInteropPath) { + String uncompressedFileName = getUncompressedFileName(compressedInteropPath.getFileName().toString()); + // Example compressedInteropPath: ../dat/r4x8/q4.1 => unCompressedFilePath: ../dat/q4 + return compressedInteropPath.getParent().getParent().resolve(uncompressedFileName); + } + + public static final String getUncompressedFileName(final String compressedFileName) { + // Returns original filename from compressed file name + int lastDotIndex = compressedFileName.lastIndexOf("."); + if (lastDotIndex >= 0) { + return compressedFileName.substring(0, lastDotIndex); + } else { + throw new CRAMException("The format of the compressed File Name is not as expected. " + + "The name of the compressed file should contain a period followed by a number that" + + "indicates the order of compression. Actual compressed file name = "+ compressedFileName); + } + } + + public static final RangeParams getRangeParams(final Path compressedInteropPath){ + // Returns RangeParams from compressed file path + final String compressedFileName = compressedInteropPath.getFileName().toString(); + final int lastDotIndex = compressedFileName.lastIndexOf("."); + if (lastDotIndex >= 0 && lastDotIndex < compressedFileName.length() - 1) { + return new RangeParams(Integer.parseInt(compressedFileName.substring(lastDotIndex + 1))); + } else { + throw new CRAMException("The format of the compressed File Name is not as expected. " + + "The name of the compressed file should contain a period followed by a number that" + + "indicates the order of compression. Actual compressed file name = "+ compressedFileName); + } + } + } \ No newline at end of file From 58ace69e9b732e75e7730c67d5fd73c6b3c056b4 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Thu, 26 Oct 2023 15:23:53 -0400 Subject: [PATCH 73/76] Rebase on RANS branch and use common methods from CRAMInteropTestUtils class --- .../samtools/cram/CRAMInteropTestUtils.java | 11 +-- .../samtools/cram/RangeInteropTest.java | 76 ++----------------- 2 files changed, 9 insertions(+), 78 deletions(-) diff --git a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java index 1c990e65c4..71b7b39555 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java @@ -33,17 +33,8 @@ public static Path getInteropTestDataLocation() { return Paths.get(INTEROP_TEST_FILES_PATH); } - // Given a test file name and the codec, map it to the corresponding compressed file path - public static final Path getCompressedCodecPath(final String codecType, final Path uncompressedInteropPath, int formatFlags) { - - // Example uncompressedInteropPath: q4, codecType: r4x16, formatFlags: 193 => compressedFileName: r4x16/q4.193 - // the substring after "." in the compressedFileName is the formatFlags or the first byte of the compressed stream - final String compressedFileName = String.format("%s/%s.%s", codecType, uncompressedInteropPath.getFileName(), formatFlags); - return uncompressedInteropPath.getParent().resolve(compressedFileName); - } - // the input files have embedded newlines that the test remove before round-tripping... - public static final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { + protected static final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { // 1. filters new lines if any. // 2. "q40+dir" file has an extra column delimited by tab. This column provides READ1 vs READ2 flag. // This file is also new-line separated. The extra column, '\t' and '\n' are filtered. diff --git a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java index f35139190c..c00e568ad0 100644 --- a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java @@ -10,7 +10,6 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; @@ -20,6 +19,11 @@ import java.util.ArrayList; import java.util.List; +import static htsjdk.samtools.cram.CRAMInteropTestUtils.filterEmbeddedNewlines; +import static htsjdk.samtools.cram.CRAMInteropTestUtils.getInteropCompressedFilePaths; +import static htsjdk.samtools.cram.CRAMInteropTestUtils.getParamsFormatFlags; +import static htsjdk.samtools.cram.CRAMInteropTestUtils.getUnCompressedFilePath; + public class RangeInteropTest extends HtsjdkTest { public static final String COMPRESSED_RANGE_DIR = "arith"; @@ -30,13 +34,13 @@ public Object[][] getRoundTripTestCases() throws IOException { // compressed testfile path, uncompressed testfile path, // Range encoder, Range decoder, Range params final List testCases = new ArrayList<>(); - for (Path path : getInteropRangeCompressedFilePaths(COMPRESSED_RANGE_DIR)) { + for (Path path : getInteropCompressedFilePaths(COMPRESSED_RANGE_DIR)) { Object[] objects = new Object[]{ path, - getRangeUnCompressedFilePath(path), + getUnCompressedFilePath(path), new RangeEncode(), new RangeDecode(), - getRangeParams(path) + new RangeParams(getParamsFormatFlags(path)) }; testCases.add(objects); } @@ -109,68 +113,4 @@ public void testDecodeOnly( } } - // the input files have embedded newlines that the test remove before round-tripping... - private final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { - // 1. filters new lines if any. - // 2. "q40+dir" file has an extra column delimited by tab. This column provides READ1 vs READ2 flag. - // This file is also new-line separated. The extra column, '\t' and '\n' are filtered. - try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) { - int skip = 0; - for (final byte b : rawBytes) { - if (b == '\t'){ - skip = 1; - } - if (b == '\n') { - skip = 0; - } - if (skip == 0 && b !='\n') { - baos.write(b); - } - } - return baos.toByteArray(); - } - } - - // return a list of all encoded test data files in the htscodecs/tests/dat/ directory - private List getInteropRangeCompressedFilePaths(final String compressedDir) throws IOException { - final List paths = new ArrayList<>(); - Files.newDirectoryStream( - CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat/"+compressedDir), - path -> Files.isRegularFile(path)) - .forEach(path -> paths.add(path)); - return paths; - } - - // Given a compressed test file path, return the corresponding uncompressed file path - public static final Path getRangeUnCompressedFilePath(final Path compressedInteropPath) { - String uncompressedFileName = getUncompressedFileName(compressedInteropPath.getFileName().toString()); - // Example compressedInteropPath: ../dat/r4x8/q4.1 => unCompressedFilePath: ../dat/q4 - return compressedInteropPath.getParent().getParent().resolve(uncompressedFileName); - } - - public static final String getUncompressedFileName(final String compressedFileName) { - // Returns original filename from compressed file name - int lastDotIndex = compressedFileName.lastIndexOf("."); - if (lastDotIndex >= 0) { - return compressedFileName.substring(0, lastDotIndex); - } else { - throw new CRAMException("The format of the compressed File Name is not as expected. " + - "The name of the compressed file should contain a period followed by a number that" + - "indicates the order of compression. Actual compressed file name = "+ compressedFileName); - } - } - - public static final RangeParams getRangeParams(final Path compressedInteropPath){ - // Returns RangeParams from compressed file path - final String compressedFileName = compressedInteropPath.getFileName().toString(); - final int lastDotIndex = compressedFileName.lastIndexOf("."); - if (lastDotIndex >= 0 && lastDotIndex < compressedFileName.length() - 1) { - return new RangeParams(Integer.parseInt(compressedFileName.substring(lastDotIndex + 1))); - } else { - throw new CRAMException("The format of the compressed File Name is not as expected. " + - "The name of the compressed file should contain a period followed by a number that" + - "indicates the order of compression. Actual compressed file name = "+ compressedFileName); - } - } - } \ No newline at end of file From f9b066cc32f4710dfb99357f22c66e01403a70e4 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Thu, 4 Jan 2024 12:06:39 -0500 Subject: [PATCH 74/76] Addressing feedback from nov 21 - part 1 --- .../cram/compression/range/ByteModel.java | 20 ++----- .../cram/compression/range/RangeCoder.java | 28 +++++---- .../cram/compression/range/RangeDecode.java | 60 ++----------------- .../cram/compression/range/Utils.java | 52 ++++++++++++++++ .../samtools/cram/RangeInteropTest.java | 26 ++++---- 5 files changed, 91 insertions(+), 95 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java b/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java index 86b891ee2e..f2f71c4e2a 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java @@ -8,7 +8,7 @@ public class ByteModel { // the cumulative frequencies of all symbols prior to this symbol, // and the total of all frequencies. public int totalFrequency; - public int maxSymbol; + public final int maxSymbol; public final int[] symbols; public final int[] frequencies; @@ -24,17 +24,7 @@ public ByteModel(final int numSymbols) { } } - // TODO: use this method to reset - public void reset() { - totalFrequency = 0; - for (int i = 0; i <= maxSymbol; i++) { - symbols[i] = 0; - frequencies[i] = 0; - } - // maxSymbol = 0; // TODO: ??? - } - - public int modelDecode(ByteBuffer inBuffer, RangeCoder rangeCoder){ + public int modelDecode(final ByteBuffer inBuffer, final RangeCoder rangeCoder){ // decodes one symbol final int freq = rangeCoder.rangeGetFrequency(totalFrequency); @@ -45,7 +35,7 @@ public int modelDecode(ByteBuffer inBuffer, RangeCoder rangeCoder){ } // update rangecoder - rangeCoder.rangeDecode(inBuffer,cumulativeFrequency,frequencies[x],totalFrequency); + rangeCoder.rangeDecode(inBuffer,cumulativeFrequency,frequencies[x]); // update model frequencies frequencies[x] += Constants.STEP; @@ -57,7 +47,7 @@ public int modelDecode(ByteBuffer inBuffer, RangeCoder rangeCoder){ } // keep symbols approximately frequency sorted - int symbol = symbols[x]; + final int symbol = symbols[x]; if (x > 0 && frequencies[x] > frequencies[x-1]){ // Swap frequencies[x], frequencies[x-1] int tmp = frequencies[x]; @@ -81,7 +71,7 @@ public void modelRenormalize(){ } } - public void modelEncode(final ByteBuffer outBuffer, RangeCoder rangeCoder, int symbol){ + public void modelEncode(final ByteBuffer outBuffer, final RangeCoder rangeCoder, final int symbol){ // encodes one input symbol int cumulativeFrequency = 0; diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java index 763a65a3b2..05b1a0f33c 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java @@ -21,7 +21,7 @@ protected RangeCoder() { this.cache = 0; } - protected void rangeDecodeStart(ByteBuffer inBuffer){ + protected void rangeDecodeStart(final ByteBuffer inBuffer){ for (int i = 0; i < 5; i++){ // Get next 5 bytes. Ensure it is +ve @@ -29,9 +29,9 @@ protected void rangeDecodeStart(ByteBuffer inBuffer){ } } - protected void rangeDecode(ByteBuffer inBuffer, int sym_low, int sym_freq, int tot_freq){ - code -= sym_low * range; - range *= sym_freq; + protected void rangeDecode(final ByteBuffer inBuffer, final int cumulativeFrequency, final int symbolFrequency){ + code -= cumulativeFrequency * range; + range *= symbolFrequency; while (range < (1<<24)) { range <<= 8; @@ -39,17 +39,21 @@ protected void rangeDecode(ByteBuffer inBuffer, int sym_low, int sym_freq, int t } } - protected int rangeGetFrequency(final int tot_freq){ - range = (long) Math.floor(range / tot_freq); + protected int rangeGetFrequency(final int totalFrequency){ + range = (long) Math.floor(range / totalFrequency); return (int) Math.floor(code / range); } - protected void rangeEncode(final ByteBuffer outBuffer, final int sym_low, final int sym_freq, final int tot_freq){ - long old_low = low; - range = (long) Math.floor(range/tot_freq); - low += sym_low * range; + protected void rangeEncode( + final ByteBuffer outBuffer, + final int cumulativeFrequency, + final int symbolFrequency, + final int totalFrequency){ + final long old_low = low; + range = (long) Math.floor(range/totalFrequency); + low += cumulativeFrequency * range; low &= 0xFFFFFFFFL; // keep bottom 4 bytes, shift the top byte out of low - range *= sym_freq; + range *= symbolFrequency; if (low < old_low) { carry = true; @@ -70,7 +74,7 @@ protected void rangeEncodeEnd(final ByteBuffer outBuffer){ } } - private void rangeShiftLow(ByteBuffer outBuffer) { + private void rangeShiftLow(final ByteBuffer outBuffer) { // rangeShiftLow tracks the total number of extra bytes to emit and // carry indicates whether they are a string of 0xFF or 0x00 values diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java index 6f892ae667..4a6f367106 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java @@ -2,7 +2,6 @@ import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.compression.BZIP2ExternalCompressor; -import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -39,16 +38,16 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { // if pack, get pack metadata, which will be used later to decode packed data int packDataLength = 0; int numSymbols = 0; - int[] packMappingTable = new int[0]; + byte[] packMappingTable = null; if (rangeParams.isPack()){ packDataLength = outSize; numSymbols = inBuffer.get() & 0xFF; // if (numSymbols > 16 or numSymbols==0), raise exception if (numSymbols <= 16 && numSymbols!=0) { - packMappingTable = new int[numSymbols]; + packMappingTable = new byte[numSymbols]; for (int i = 0; i < numSymbols; i++) { - packMappingTable[i] = inBuffer.get() & 0xFF; + packMappingTable[i] = inBuffer.get(); } outSize = Utils.readUint7(inBuffer); } else { @@ -92,8 +91,8 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { } // if pack, then decodePack - if (rangeParams.isPack() && packMappingTable.length > 0) { - outBuffer = decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); + if (rangeParams.isPack()) { + outBuffer = Utils.decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); } outBuffer.rewind(); return outBuffer; @@ -227,55 +226,6 @@ private ByteBuffer uncompressEXT( return outBuffer; } - private ByteBuffer decodePack(ByteBuffer inBuffer, final int[] packMappingTable, int numSymbols, int uncompressedPackOutputLength) { - ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength); - int j = 0; - - if (numSymbols <= 1) { - for (int i=0; i < uncompressedPackOutputLength; i++){ - outBufferPack.put(i, (byte) packMappingTable[0]); - } - } - - // 1 bit per value - else if (numSymbols <= 2) { - int v = 0; - for (int i=0; i < uncompressedPackOutputLength; i++){ - if (i % 8 == 0){ - v = inBuffer.get(j++); - } - outBufferPack.put(i, (byte) packMappingTable[v & 1]); - v >>=1; - } - } - - // 2 bits per value - else if (numSymbols <= 4){ - int v = 0; - for(int i=0; i < uncompressedPackOutputLength; i++){ - if (i % 4 == 0){ - v = inBuffer.get(j++); - } - outBufferPack.put(i, (byte) packMappingTable[v & 3]); - v >>=2; - } - } - - // 4 bits per value - else if (numSymbols <= 16){ - int v = 0; - for(int i=0; i < uncompressedPackOutputLength; i++){ - if (i % 2 == 0){ - v = inBuffer.get(j++); - } - outBufferPack.put(i, (byte) packMappingTable[v & 15]); - v >>=4; - } - } - inBuffer = outBufferPack; - return inBuffer; - } - private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){ final int numInterleaveStreams = inBuffer.get() & 0xFF; diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/Utils.java b/src/main/java/htsjdk/samtools/cram/compression/range/Utils.java index 0f6b1507dd..abb0969320 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/Utils.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/Utils.java @@ -28,4 +28,56 @@ public static int readUint7(ByteBuffer cp) { } while ((c & 0x80) != 0); return i; } + + public static ByteBuffer decodePack( + final ByteBuffer inBuffer, + final byte[] packMappingTable, + final int numSymbols, + final int uncompressedPackOutputLength) { + ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength); + int j = 0; + + if (numSymbols <= 1) { + for (int i=0; i < uncompressedPackOutputLength; i++){ + outBufferPack.put(i, packMappingTable[0]); + } + } + + // 1 bit per value + else if (numSymbols <= 2) { + int v = 0; + for (int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 8 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, packMappingTable[v & 1]); + v >>=1; + } + } + + // 2 bits per value + else if (numSymbols <= 4){ + int v = 0; + for(int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 4 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, packMappingTable[v & 3]); + v >>=2; + } + } + + // 4 bits per value + else if (numSymbols <= 16){ + int v = 0; + for(int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 2 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, packMappingTable[v & 15]); + v >>=4; + } + } + return outBufferPack; + } } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java index c00e568ad0..1f9547e744 100644 --- a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java @@ -19,11 +19,6 @@ import java.util.ArrayList; import java.util.List; -import static htsjdk.samtools.cram.CRAMInteropTestUtils.filterEmbeddedNewlines; -import static htsjdk.samtools.cram.CRAMInteropTestUtils.getInteropCompressedFilePaths; -import static htsjdk.samtools.cram.CRAMInteropTestUtils.getParamsFormatFlags; -import static htsjdk.samtools.cram.CRAMInteropTestUtils.getUnCompressedFilePath; - public class RangeInteropTest extends HtsjdkTest { public static final String COMPRESSED_RANGE_DIR = "arith"; @@ -34,13 +29,13 @@ public Object[][] getRoundTripTestCases() throws IOException { // compressed testfile path, uncompressed testfile path, // Range encoder, Range decoder, Range params final List testCases = new ArrayList<>(); - for (Path path : getInteropCompressedFilePaths(COMPRESSED_RANGE_DIR)) { + for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_RANGE_DIR)) { Object[] objects = new Object[]{ path, - getUnCompressedFilePath(path), + CRAMInteropTestUtils.getUnCompressedFilePath(path), new RangeEncode(), new RangeDecode(), - new RangeParams(getParamsFormatFlags(path)) + new RangeParams(CRAMInteropTestUtils.getParamsFormatFlags(path)) }; testCases.add(objects); } @@ -70,7 +65,7 @@ public void testRangeRoundTrip( // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through Range codec and compare the // results - final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); if (params.isStripe()) { Assert.assertThrows(CRAMException.class, () -> rangeEncode.compress(uncompressedInteropBytes, params)); @@ -95,11 +90,16 @@ public void testDecodeOnly( try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath) ) { - // preprocess the uncompressed data (to match what the htscodecs-library test harness does) - // by filtering out the embedded newlines, and then round trip through Range codec and compare the - // results - final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + // by filtering out the embedded newlines, and then round trip through Range codec + // and compare the results + + final ByteBuffer uncompressedInteropBytes; + if (uncompressedInteropPath.toString().contains("htscodecs/tests/dat/u")) { + uncompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(uncompressedInteropStream)); + } else { + uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + } final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream)); // Use htsjdk to uncompress the precompressed file from htscodecs repo From 4a416f763c6550849be08b442daa2c761951c93f Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Wed, 24 Jan 2024 14:47:56 -0500 Subject: [PATCH 75/76] Addressing feedback from nov 21 - part 2 --- .../cram/compression/ExternalCompressor.java | 7 + .../compression/RangeExternalCompressor.java | 61 ++++++ .../cram/compression/range/RangeCoder.java | 10 +- .../cram/compression/range/RangeDecode.java | 45 ++--- .../cram/compression/range/RangeEncode.java | 185 ++++++------------ .../cram/compression/range/Utils.java | 83 -------- .../cram/structure/CompressorCache.java | 5 +- .../block/BlockCompressionMethod.java | 5 +- .../cram/compression/CompressorCacheTest.java | 16 +- .../compression/ExternalCompressionTest.java | 17 +- 10 files changed, 186 insertions(+), 248 deletions(-) create mode 100644 src/main/java/htsjdk/samtools/cram/compression/RangeExternalCompressor.java delete mode 100644 src/main/java/htsjdk/samtools/cram/compression/range/Utils.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java b/src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java index aabd3bc4cd..5c8f6b34fd 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java +++ b/src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java @@ -1,5 +1,7 @@ package htsjdk.samtools.cram.compression; +import htsjdk.samtools.cram.compression.range.RangeDecode; +import htsjdk.samtools.cram.compression.range.RangeEncode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; @@ -75,6 +77,11 @@ public static ExternalCompressor getCompressorForMethod( new RANSExternalCompressor(new RANS4x8Encode(), new RANS4x8Decode()) : new RANSExternalCompressor(compressorSpecificArg, new RANS4x8Encode(), new RANS4x8Decode()); + case RANGE: + return compressorSpecificArg == NO_COMPRESSION_ARG ? + new RangeExternalCompressor(new RangeEncode(), new RangeDecode()) : + new RangeExternalCompressor(compressorSpecificArg, new RangeEncode(), new RangeDecode()); + case BZIP2: ValidationUtils.validateArg( compressorSpecificArg == NO_COMPRESSION_ARG, diff --git a/src/main/java/htsjdk/samtools/cram/compression/RangeExternalCompressor.java b/src/main/java/htsjdk/samtools/cram/compression/RangeExternalCompressor.java new file mode 100644 index 0000000000..1c2a87982c --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/RangeExternalCompressor.java @@ -0,0 +1,61 @@ +package htsjdk.samtools.cram.compression; + +import htsjdk.samtools.cram.compression.range.RangeDecode; +import htsjdk.samtools.cram.compression.range.RangeEncode; +import htsjdk.samtools.cram.compression.range.RangeParams; +import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; + +import java.nio.ByteBuffer; + +public class RangeExternalCompressor extends ExternalCompressor{ + + private final int formatFlags; + private final RangeEncode rangeEncode; + private final RangeDecode rangeDecode; + + public RangeExternalCompressor( + final RangeEncode rangeEncode, + final RangeDecode rangeDecode) { + this(0, rangeEncode, rangeDecode); + } + + public RangeExternalCompressor( + final int formatFlags, + final RangeEncode rangeEncode, + final RangeDecode rangeDecode) { + super(BlockCompressionMethod.RANGE); + this.rangeEncode = rangeEncode; + this.rangeDecode = rangeDecode; + this.formatFlags = formatFlags; + } + + @Override + public byte[] compress(byte[] data) { + final RangeParams params = new RangeParams(formatFlags); + final ByteBuffer buffer = rangeEncode.compress(ByteBuffer.wrap(data), params); + return toByteArray(buffer); + } + + @Override + public byte[] uncompress(byte[] data) { + final ByteBuffer buf = rangeDecode.uncompress(ByteBuffer.wrap(data)); + return toByteArray(buf); + } + + @Override + public String toString() { + return String.format("%s(%s)", this.getMethod(),formatFlags); + } + + private byte[] toByteArray(final ByteBuffer buffer) { + if (buffer.hasArray() && buffer.arrayOffset() == 0 && buffer.array().length == buffer.limit()) { + return buffer.array(); + } + + final byte[] bytes = new byte[buffer.remaining()]; + buffer.get(bytes); + return bytes; + } + + +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java index 05b1a0f33c..022cd106d3 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java @@ -4,6 +4,8 @@ public class RangeCoder { + private static final long MAX_RANGE = 0xFFFFFFFFL; + private long low; private long range; private long code; @@ -14,7 +16,7 @@ public class RangeCoder { protected RangeCoder() { // Spec: RangeEncodeStart this.low = 0; - this.range = 0xFFFFFFFFL; // 4 bytes of all 1's + this.range = MAX_RANGE; // 4 bytes of all 1's this.code = 0; this.FFnum = 0; this.carry = false; @@ -23,10 +25,9 @@ protected RangeCoder() { protected void rangeDecodeStart(final ByteBuffer inBuffer){ for (int i = 0; i < 5; i++){ - - // Get next 5 bytes. Ensure it is +ve code = (code << 8) + (inBuffer.get() & 0xFF); } + code &= MAX_RANGE; } protected void rangeDecode(final ByteBuffer inBuffer, final int cumulativeFrequency, final int symbolFrequency){ @@ -68,7 +69,6 @@ protected void rangeEncode( } protected void rangeEncodeEnd(final ByteBuffer outBuffer){ - //TODO: Where is the magic number 5 coming from? for(int i = 0; i < 5; i++){ rangeShiftLow(outBuffer); } @@ -96,7 +96,7 @@ private void rangeShiftLow(final ByteBuffer outBuffer) { } } - cache = (int) (low >> 24); // Copy of top byte ready for next flush + cache = (int) (low >>> 24); // Copy of top byte ready for next flush carry = false; } else { FFnum++; diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java index 4a6f367106..c8f7e62127 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java @@ -2,6 +2,7 @@ import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.compression.BZIP2ExternalCompressor; +import htsjdk.samtools.cram.compression.CompressionUtils; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -27,7 +28,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { final RangeParams rangeParams = new RangeParams(formatFlags); // noSz - outSize = rangeParams.isNosz() ? outSize : Utils.readUint7(inBuffer); + outSize = rangeParams.isNosz() ? outSize : CompressionUtils.readUint7(inBuffer); // stripe if (rangeParams.isStripe()) { @@ -49,7 +50,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { for (int i = 0; i < numSymbols; i++) { packMappingTable[i] = inBuffer.get(); } - outSize = Utils.readUint7(inBuffer); + outSize = CompressionUtils.readUint7(inBuffer); } else { throw new CRAMException("Bit Packing is not permitted when number of distinct symbols is greater than 16 or equal to 0. Number of distinct symbols: " + numSymbols); } @@ -92,7 +93,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { // if pack, then decodePack if (rangeParams.isPack()) { - outBuffer = Utils.decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); + outBuffer = CompressionUtils.decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); } outBuffer.rewind(); return outBuffer; @@ -124,22 +125,16 @@ private ByteBuffer uncompressOrder1( int maxSymbols = inBuffer.get() & 0xFF; maxSymbols = maxSymbols==0 ? 256 : maxSymbols; - - final List byteModelList = new ArrayList(); - + final List byteModelList = new ArrayList(maxSymbols); for(int i=0;i j){ - ulen[j]++; + uncompressedLengths[j]++; } - T[j] = uncompress(inBuffer, ulen[j]); + transposedData[j] = uncompress(inBuffer, uncompressedLengths[j]); } // Transpose - ByteBuffer out = ByteBuffer.allocate(outSize); + final ByteBuffer outBuffer = CompressionUtils.allocateByteBuffer(outSize); for (int j = 0; j { @@ -16,14 +17,14 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar return EMPTY_BUFFER; } - final ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining()); + final ByteBuffer outBuffer = CompressionUtils.allocateOutputBuffer(inBuffer.remaining()); outBuffer.order(ByteOrder.BIG_ENDIAN); final int formatFlags = rangeParams.getFormatFlags(); outBuffer.put((byte) (formatFlags)); if (!rangeParams.isNosz()) { // original size is not recorded - Utils.writeUint7(inBuffer.remaining(),outBuffer); + CompressionUtils.writeUint7(inBuffer.remaining(), outBuffer); } ByteBuffer inputBuffer = inBuffer; @@ -33,75 +34,79 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar throw new CRAMException("Range Encoding with Stripe Flag is not implemented."); } - final RangeParams.ORDER order = rangeParams.getOrder(); final int inSize = inputBuffer.remaining(); // e_len -> inSize // Pack if (rangeParams.isPack()) { final int[] frequencyTable = new int[Constants.NUMBER_OF_SYMBOLS]; - for (int i = 0; i < inSize; i ++) { + for (int i = 0; i < inSize; i++) { frequencyTable[inputBuffer.get(i) & 0xFF]++; } int numSymbols = 0; final int[] packMappingTable = new int[Constants.NUMBER_OF_SYMBOLS]; for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { - if (frequencyTable[i]>0) { + if (frequencyTable[i] > 0) { packMappingTable[i] = numSymbols++; } } // skip Packing if numSymbols = 0 or numSymbols > 16 - if (numSymbols !=0 && numSymbols <= 16) { - inputBuffer = encodePack(inputBuffer, outBuffer, frequencyTable, packMappingTable, numSymbols); + if (numSymbols != 0 && numSymbols <= 16) { + inputBuffer = CompressionUtils.encodePack(inputBuffer, outBuffer, frequencyTable, packMappingTable, numSymbols); } else { // unset pack flag in the first byte of the outBuffer - outBuffer.put(0,(byte)(outBuffer.get(0) & ~RangeParams.PACK_FLAG_MASK)); + outBuffer.put(0, (byte) (outBuffer.get(0) & ~RangeParams.PACK_FLAG_MASK)); } } - if (rangeParams.isCAT()){ + if (rangeParams.isCAT()) { // Data is uncompressed outBuffer.put(inputBuffer); outBuffer.limit(outBuffer.position()); outBuffer.rewind(); // set position to 0 - return outBuffer; - } else if (rangeParams.isExternalCompression()){ + } else if (rangeParams.isExternalCompression()) { final byte[] rawBytes = new byte[inputBuffer.remaining()]; - inputBuffer.get( rawBytes,inBuffer.position(), inputBuffer.remaining()); + inputBuffer.get(rawBytes, inBuffer.position(), inputBuffer.remaining()); final BZIP2ExternalCompressor compressor = new BZIP2ExternalCompressor(); - final byte [] extCompressedBytes = compressor.compress(rawBytes); + final byte[] extCompressedBytes = compressor.compress(rawBytes); outBuffer.put(extCompressedBytes); outBuffer.limit(outBuffer.position()); outBuffer.rewind(); // set position to 0 - return outBuffer; - } else if (rangeParams.isRLE()){ + } else if (rangeParams.isRLE()) { switch (rangeParams.getOrder()) { case ZERO: - return compressRLEOrder0(inputBuffer, outBuffer); + compressRLEOrder0(inputBuffer, outBuffer); + break; case ONE: - return compressRLEOrder1(inputBuffer, outBuffer); + compressRLEOrder1(inputBuffer, outBuffer); + break; + default: + throw new CRAMException("Unknown range order: " + rangeParams.getOrder()); } } else { switch (rangeParams.getOrder()) { case ZERO: - return compressOrder0(inputBuffer, outBuffer); + compressOrder0(inputBuffer, outBuffer); + break; case ONE: - return compressOrder1(inputBuffer, outBuffer); + compressOrder1(inputBuffer, outBuffer); + break; + default: + throw new CRAMException("Unknown range order: " + rangeParams.getOrder()); } - } return outBuffer; } - private ByteBuffer compressOrder0 ( + private void compressOrder0( final ByteBuffer inBuffer, final ByteBuffer outBuffer) { int maxSymbol = 0; final int inSize = inBuffer.remaining(); - for (int i = 0; i < inSize; i++){ - if(maxSymbol < (inBuffer.get(i) & 0xFF)){ + for (int i = 0; i < inSize; i++) { + if (maxSymbol < (inBuffer.get(i) & 0xFF)) { maxSymbol = inBuffer.get(i) & 0xFF; } } @@ -111,22 +116,21 @@ private ByteBuffer compressOrder0 ( final ByteModel byteModel = new ByteModel(maxSymbol); outBuffer.put((byte) maxSymbol); final RangeCoder rangeCoder = new RangeCoder(); - for (int i = 0; i < inSize; i++){ - byteModel.modelEncode(outBuffer,rangeCoder,inBuffer.get(i)&0xFF); + for (int i = 0; i < inSize; i++) { + byteModel.modelEncode(outBuffer, rangeCoder, inBuffer.get(i) & 0xFF); } rangeCoder.rangeEncodeEnd(outBuffer); outBuffer.limit(outBuffer.position()); outBuffer.rewind(); - return outBuffer; } - private ByteBuffer compressOrder1 ( + private void compressOrder1( final ByteBuffer inBuffer, final ByteBuffer outBuffer) { int maxSymbol = 0; final int inSize = inBuffer.remaining(); - for (int i = 0; i < inSize; i++){ - if(maxSymbol < (inBuffer.get(i) & 0xFF)){ + for (int i = 0; i < inSize; i++) { + if (maxSymbol < (inBuffer.get(i) & 0xFF)) { maxSymbol = inBuffer.get(i) & 0xFF; } } @@ -136,8 +140,8 @@ private ByteBuffer compressOrder1 ( // TODO: initialize byteModel -> set and reset symbols? - for(int i=0;i byteModelRunsList = new ArrayList(258); - for (int i=0; i <= 257; i++){ - byteModelRunsList.add(i,new ByteModel(4)); + for (int i = 0; i <= 257; i++) { + byteModelRunsList.add(i, new ByteModel(4)); } - outBuffer.put((byte)maxSymbols); + outBuffer.put((byte) maxSymbols); final RangeCoder rangeCoder = new RangeCoder(); @@ -183,19 +186,19 @@ private ByteBuffer compressRLEOrder0 ( while (i < inSize) { modelLit.modelEncode(outBuffer, rangeCoder, inBuffer.get(i) & 0xFF); int run = 1; - while (i+run < inSize && (inBuffer.get(i+run) & 0xFF)== (inBuffer.get(i) & 0xFF)){ + while (i + run < inSize && (inBuffer.get(i + run) & 0xFF) == (inBuffer.get(i) & 0xFF)) { run++; } run--; // Check this!! int rctx = inBuffer.get(i) & 0xFF; - i += run+1; - int part = run >=3 ? 3 : run; + i += run + 1; + int part = run >= 3 ? 3 : run; byteModelRunsList.get(rctx).modelEncode(outBuffer, rangeCoder, part); run -= part; rctx = 256; - while (part == 3){ - part = run >=3 ? 3 : run; - byteModelRunsList.get(rctx).modelEncode(outBuffer,rangeCoder,part); + while (part == 3) { + part = run >= 3 ? 3 : run; + byteModelRunsList.get(rctx).modelEncode(outBuffer, rangeCoder, part); rctx = 257; run -= part; } @@ -203,10 +206,9 @@ private ByteBuffer compressRLEOrder0 ( rangeCoder.rangeEncodeEnd(outBuffer); outBuffer.limit(outBuffer.position()); outBuffer.rewind(); - return outBuffer; } - private ByteBuffer compressRLEOrder1 ( + private void compressRLEOrder1( final ByteBuffer inBuffer, final ByteBuffer outBuffer) { int maxSymbols = 0; @@ -219,14 +221,14 @@ private ByteBuffer compressRLEOrder1 ( maxSymbols++; // FIXME not what spec states! final List modelLitList = new ArrayList<>(maxSymbols); - for (int i = 0; i < maxSymbols; i++){ + for (int i = 0; i < maxSymbols; i++) { modelLitList.add(i, new ByteModel(maxSymbols)); } final List byteModelRunsList = new ArrayList(258); - for (int i=0; i <= 257; i++){ - byteModelRunsList.add(i,new ByteModel(4)); + for (int i = 0; i <= 257; i++) { + byteModelRunsList.add(i, new ByteModel(4)); } - outBuffer.put((byte)maxSymbols); + outBuffer.put((byte) maxSymbols); final RangeCoder rangeCoder = new RangeCoder(); @@ -235,20 +237,20 @@ private ByteBuffer compressRLEOrder1 ( while (i < inSize) { modelLitList.get(last).modelEncode(outBuffer, rangeCoder, inBuffer.get(i) & 0xFF); int run = 1; - while (i+run < inSize && inBuffer.get(i+run) == inBuffer.get(i)){ + while (i + run < inSize && inBuffer.get(i + run) == inBuffer.get(i)) { run++; } run--; // Check this!! int rctx = inBuffer.get(i) & 0xFF; last = inBuffer.get(i) & 0xFF; - i += run+1; - int part = run >=3 ? 3 : run; + i += run + 1; + int part = run >= 3 ? 3 : run; byteModelRunsList.get(rctx).modelEncode(outBuffer, rangeCoder, part); run -= part; rctx = 256; - while (part == 3){ - part = run >=3 ? 3 : run; - byteModelRunsList.get(rctx).modelEncode(outBuffer,rangeCoder,part); + while (part == 3) { + part = run >= 3 ? 3 : run; + byteModelRunsList.get(rctx).modelEncode(outBuffer, rangeCoder, part); rctx = 257; run -= part; } @@ -256,79 +258,6 @@ private ByteBuffer compressRLEOrder1 ( rangeCoder.rangeEncodeEnd(outBuffer); outBuffer.limit(outBuffer.position()); outBuffer.rewind(); - return outBuffer; } - protected ByteBuffer allocateOutputBuffer(final int inSize) { - - // same as the allocateOutputBuffer in RANS4x8Encode and RANSNx16Encode - // consider deduplication - final int compressedSize = (int) (1.05 * inSize + 257 * 257 * 3 + 9); - final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize); - if (outputBuffer.remaining() < compressedSize) { - throw new RuntimeException("Failed to allocate sufficient buffer size for Range coder."); - } - outputBuffer.order(ByteOrder.LITTLE_ENDIAN); - return outputBuffer; - } - - private ByteBuffer encodePack( - final ByteBuffer inBuffer , - final ByteBuffer outBuffer, - final int[] frequencyTable, - final int[] packMappingTable, - final int numSymbols){ - final int inSize = inBuffer.remaining(); - ByteBuffer data; - if (numSymbols <= 1) { - data = ByteBuffer.allocate(0); - } else if (numSymbols <= 2) { - - // 1 bit per value - data = ByteBuffer.allocate((int) Math.ceil((double) inSize/8)); - int j = -1; - for (int i = 0; i < inSize; i ++) { - if (i % 8 == 0) { - data.put(++j, (byte) 0); - } - data.put(j, (byte) (data.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << (i % 8)))); - } - } else if (numSymbols <= 4) { - - // 2 bits per value - data = ByteBuffer.allocate((int) Math.ceil((double) inSize/4)); - int j = -1; - for (int i = 0; i < inSize; i ++) { - if (i % 4 == 0) { - data.put(++j, (byte) 0); - } - data.put(j, (byte) (data.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 4) * 2)))); - } - } else { - - // 4 bits per value - data = ByteBuffer.allocate((int) Math.ceil((double)inSize/2)); - int j = -1; - for (int i = 0; i < inSize; i ++) { - if (i % 2 == 0) { - data.put(++j, (byte) 0); - } - data.put(j, (byte) (data.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 2) * 4)))); - } - } - - // write numSymbols - outBuffer.put((byte) numSymbols); - - // write mapping table "packMappingTable" that converts mapped value to original symbol - for(int i = 0 ; i < Constants.NUMBER_OF_SYMBOLS; i ++) { - if (frequencyTable[i] > 0) { - outBuffer.put((byte) i); - } - } - - // write the length of data - Utils.writeUint7(data.limit(), outBuffer); - return data; // Here position = 0 since we have always accessed the data buffer using index - } } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/Utils.java b/src/main/java/htsjdk/samtools/cram/compression/range/Utils.java deleted file mode 100644 index abb0969320..0000000000 --- a/src/main/java/htsjdk/samtools/cram/compression/range/Utils.java +++ /dev/null @@ -1,83 +0,0 @@ -package htsjdk.samtools.cram.compression.range; - -import java.nio.ByteBuffer; - -public class Utils { - public static void writeUint7(int i, ByteBuffer cp) { - int s = 0; - int X = i; - do { - s += 7; - X >>= 7; - } while (X > 0); - do { - s -= 7; - //writeByte - int s_ = (s > 0) ? 1 : 0; - cp.put((byte) (((i >> s) & 0x7f) + (s_ << 7))); - } while (s > 0); - } - - public static int readUint7(ByteBuffer cp) { - int i = 0; - int c; - do { - //read byte - c = cp.get(); - i = (i << 7) | (c & 0x7f); - } while ((c & 0x80) != 0); - return i; - } - - public static ByteBuffer decodePack( - final ByteBuffer inBuffer, - final byte[] packMappingTable, - final int numSymbols, - final int uncompressedPackOutputLength) { - ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength); - int j = 0; - - if (numSymbols <= 1) { - for (int i=0; i < uncompressedPackOutputLength; i++){ - outBufferPack.put(i, packMappingTable[0]); - } - } - - // 1 bit per value - else if (numSymbols <= 2) { - int v = 0; - for (int i=0; i < uncompressedPackOutputLength; i++){ - if (i % 8 == 0){ - v = inBuffer.get(j++); - } - outBufferPack.put(i, packMappingTable[v & 1]); - v >>=1; - } - } - - // 2 bits per value - else if (numSymbols <= 4){ - int v = 0; - for(int i=0; i < uncompressedPackOutputLength; i++){ - if (i % 4 == 0){ - v = inBuffer.get(j++); - } - outBufferPack.put(i, packMappingTable[v & 3]); - v >>=2; - } - } - - // 4 bits per value - else if (numSymbols <= 16){ - int v = 0; - for(int i=0; i < uncompressedPackOutputLength; i++){ - if (i % 2 == 0){ - v = inBuffer.get(j++); - } - outBufferPack.put(i, packMappingTable[v & 15]); - v >>=4; - } - } - return outBufferPack; - } -} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java b/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java index 4f1f781697..81b4f98199 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java @@ -89,7 +89,8 @@ public ExternalCompressor getCompressorForMethod( ); } return getCachedCompressorForMethod(compressorTuple.a, compressorTuple.b); - + case RANGE: + return getCachedCompressorForMethod(compressionMethod, compressorSpecificArg); default: throw new IllegalArgumentException(String.format("Unknown compression method %s", compressionMethod)); } @@ -104,4 +105,4 @@ private ExternalCompressor getCachedCompressorForMethod(final BlockCompressionMe ); } -} +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/structure/block/BlockCompressionMethod.java b/src/main/java/htsjdk/samtools/cram/structure/block/BlockCompressionMethod.java index f37b82e463..d4b1c8aa7a 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/block/BlockCompressionMethod.java +++ b/src/main/java/htsjdk/samtools/cram/structure/block/BlockCompressionMethod.java @@ -32,7 +32,8 @@ public enum BlockCompressionMethod { GZIP(1), BZIP2(2), LZMA(3), - RANS(4); + RANS(4), + RANGE(5); private final int methodId; @@ -65,4 +66,4 @@ public static BlockCompressionMethod byId(final int id) { private static final Map ID_MAP = Collections.unmodifiableMap(Stream.of(BlockCompressionMethod.values()) .collect(Collectors.toMap(BlockCompressionMethod::getMethodId, Function.identity()))); -} +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/compression/CompressorCacheTest.java b/src/test/java/htsjdk/samtools/cram/compression/CompressorCacheTest.java index 20e84ed22f..a4f684fb11 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/CompressorCacheTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/CompressorCacheTest.java @@ -2,6 +2,7 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.Defaults; +import htsjdk.samtools.cram.compression.range.RangeParams; import htsjdk.samtools.cram.structure.CompressorCache; import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; import org.testng.Assert; @@ -30,6 +31,19 @@ public Object[][] cachedCompressorForMethodPositiveTests() { {BlockCompressionMethod.RANS, 1, RANSExternalCompressor.class}, {BlockCompressionMethod.RANS, 0, RANSExternalCompressor.class}, {BlockCompressionMethod.RANS, ExternalCompressor.NO_COMPRESSION_ARG, RANSExternalCompressor.class}, + {BlockCompressionMethod.RANGE, 0x00, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.ORDER_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.RLE_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.CAT_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.CAT_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.PACK_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.PACK_FLAG_MASK | RangeParams. ORDER_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.EXT_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.EXT_FLAG_MASK | RangeParams.PACK_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, ExternalCompressor.NO_COMPRESSION_ARG, RangeExternalCompressor.class}, }; } @@ -63,4 +77,4 @@ public void testGetCompressorForMethodNegative( final int compressorSpecificArg) { compressorCache.getCompressorForMethod(method, compressorSpecificArg); } -} +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/compression/ExternalCompressionTest.java b/src/test/java/htsjdk/samtools/cram/compression/ExternalCompressionTest.java index 84375ea84a..252a7ef8b2 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/ExternalCompressionTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/ExternalCompressionTest.java @@ -2,6 +2,7 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.Defaults; +import htsjdk.samtools.cram.compression.range.RangeParams; import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -29,6 +30,20 @@ public Object[][] compressorForMethodPositiveTests() { {BlockCompressionMethod.RANS, 1, RANSExternalCompressor.class}, {BlockCompressionMethod.RANS, 0, RANSExternalCompressor.class}, {BlockCompressionMethod.RANS, ExternalCompressor.NO_COMPRESSION_ARG, RANSExternalCompressor.class}, + {BlockCompressionMethod.RANGE, 1, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, 0x00, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.ORDER_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.RLE_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.CAT_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.CAT_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.PACK_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.PACK_FLAG_MASK | RangeParams. ORDER_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.EXT_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, RangeParams.EXT_FLAG_MASK | RangeParams.PACK_FLAG_MASK, RangeExternalCompressor.class}, + {BlockCompressionMethod.RANGE, ExternalCompressor.NO_COMPRESSION_ARG, RangeExternalCompressor.class}, }; } @@ -82,4 +97,4 @@ public void testBZip2Decompression() throws IOException { Assert.assertEquals(output, "BZip2 worked".getBytes()); } -} +} \ No newline at end of file From 55f6086c408d47500d15aeb969331b41f8b47668 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 29 Jan 2024 15:22:34 -0500 Subject: [PATCH 76/76] Addressing feedback from nov 21, nov 28 - part 3 --- .../cram/compression/CompressionUtils.java | 4 +- .../compression/RANSExternalCompressor.java | 4 +- .../compression/RangeExternalCompressor.java | 4 +- .../cram/compression/range/Constants.java | 1 + .../cram/compression/range/RangeCoder.java | 6 +- .../cram/compression/range/RangeDecode.java | 86 ++++++++++--------- .../cram/compression/range/RangeEncode.java | 24 ++---- .../cram/compression/range/RangeParams.java | 2 +- .../rans/ransnx16/RANSNx16Decode.java | 4 +- .../samtools/cram/CRAMInteropTestUtils.java | 19 ++-- .../htsjdk/samtools/cram/RANSInteropTest.java | 36 +++----- .../samtools/cram/RangeInteropTest.java | 47 ++++++++-- .../cram/compression/range/RangeTest.java | 7 +- 13 files changed, 122 insertions(+), 122 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java b/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java index 6d9a725696..d4d1408448 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java +++ b/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java @@ -147,15 +147,13 @@ else if (numSymbols <= 16){ return outBufferPack; } - - public static ByteBuffer allocateOutputBuffer(final int inSize) { // This calculation is identical to the one in samtools rANS_static.c // Presumably the frequency table (always big enough for order 1) = 257*257, // then * 3 for each entry (byte->symbol, 2 bytes -> scaled frequency), // + 9 for the header (order byte, and 2 int lengths for compressed/uncompressed lengths). final int compressedSize = (int) (inSize + 257 * 257 * 3 + 9); - final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize).order(ByteOrder.LITTLE_ENDIAN); + final ByteBuffer outputBuffer = allocateByteBuffer(compressedSize); if (outputBuffer.remaining() < compressedSize) { throw new CRAMException("Failed to allocate sufficient buffer size for RANS coder."); } diff --git a/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java b/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java index 848d7a2906..dd4794b0e3 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java +++ b/src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java @@ -68,13 +68,13 @@ public RANSExternalCompressor( @Override public byte[] compress(final byte[] data) { final RANS4x8Params params = new RANS4x8Params(order); - final ByteBuffer buffer = ransEncode.compress(ByteBuffer.wrap(data), params); + final ByteBuffer buffer = ransEncode.compress(CompressionUtils.wrap(data), params); return toByteArray(buffer); } @Override public byte[] uncompress(byte[] data) { - final ByteBuffer buf = ransDecode.uncompress(ByteBuffer.wrap(data)); + final ByteBuffer buf = ransDecode.uncompress(CompressionUtils.wrap(data)); return toByteArray(buf); } diff --git a/src/main/java/htsjdk/samtools/cram/compression/RangeExternalCompressor.java b/src/main/java/htsjdk/samtools/cram/compression/RangeExternalCompressor.java index 1c2a87982c..650ac7c275 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/RangeExternalCompressor.java +++ b/src/main/java/htsjdk/samtools/cram/compression/RangeExternalCompressor.java @@ -32,13 +32,13 @@ public RangeExternalCompressor( @Override public byte[] compress(byte[] data) { final RangeParams params = new RangeParams(formatFlags); - final ByteBuffer buffer = rangeEncode.compress(ByteBuffer.wrap(data), params); + final ByteBuffer buffer = rangeEncode.compress(CompressionUtils.wrap(data), params); return toByteArray(buffer); } @Override public byte[] uncompress(byte[] data) { - final ByteBuffer buf = rangeDecode.uncompress(ByteBuffer.wrap(data)); + final ByteBuffer buf = rangeDecode.uncompress(CompressionUtils.wrap(data)); return toByteArray(buf); } diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/Constants.java b/src/main/java/htsjdk/samtools/cram/compression/range/Constants.java index 25066b1d2e..e2e941a549 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/Constants.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/Constants.java @@ -4,4 +4,5 @@ final public class Constants { public static final int NUMBER_OF_SYMBOLS = 256; public static final int MAX_FREQ = ((1<<16)-17); public static final int STEP = 16; + public static final long MAX_RANGE = 0xFFFFFFFFL; } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java index 022cd106d3..a7d7b21828 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java @@ -4,8 +4,6 @@ public class RangeCoder { - private static final long MAX_RANGE = 0xFFFFFFFFL; - private long low; private long range; private long code; @@ -16,7 +14,7 @@ public class RangeCoder { protected RangeCoder() { // Spec: RangeEncodeStart this.low = 0; - this.range = MAX_RANGE; // 4 bytes of all 1's + this.range = Constants.MAX_RANGE; // 4 bytes of all 1's this.code = 0; this.FFnum = 0; this.carry = false; @@ -27,7 +25,7 @@ protected void rangeDecodeStart(final ByteBuffer inBuffer){ for (int i = 0; i < 5; i++){ code = (code << 8) + (inBuffer.get() & 0xFF); } - code &= MAX_RANGE; + code &= Constants.MAX_RANGE; } protected void rangeDecode(final ByteBuffer inBuffer, final int cumulativeFrequency, final int symbolFrequency){ diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java index c8f7e62127..5987630170 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java @@ -11,14 +11,19 @@ public class RangeDecode { - private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0); + // This method assumes that inBuffer is already rewound. + // It uncompresses the data in the inBuffer, leaving it consumed. + // Returns a rewound ByteBuffer containing the uncompressed data. public ByteBuffer uncompress(final ByteBuffer inBuffer) { + + // For Range decoding, the bytes are read in little endian from the input stream inBuffer.order(ByteOrder.LITTLE_ENDIAN); return uncompress(inBuffer, 0); } - private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { + private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } @@ -28,11 +33,11 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { final RangeParams rangeParams = new RangeParams(formatFlags); // noSz - outSize = rangeParams.isNosz() ? outSize : CompressionUtils.readUint7(inBuffer); + int uncompressedSize = rangeParams.isNosz() ? outSize : CompressionUtils.readUint7(inBuffer); // stripe if (rangeParams.isStripe()) { - return decodeStripe(inBuffer, outSize); + return decodeStripe(inBuffer, uncompressedSize); } // pack @@ -41,7 +46,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { int numSymbols = 0; byte[] packMappingTable = null; if (rangeParams.isPack()){ - packDataLength = outSize; + packDataLength = uncompressedSize; numSymbols = inBuffer.get() & 0xFF; // if (numSymbols > 16 or numSymbols==0), raise exception @@ -50,43 +55,49 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { for (int i = 0; i < numSymbols; i++) { packMappingTable[i] = inBuffer.get(); } - outSize = CompressionUtils.readUint7(inBuffer); + uncompressedSize = CompressionUtils.readUint7(inBuffer); } else { - throw new CRAMException("Bit Packing is not permitted when number of distinct symbols is greater than 16 or equal to 0. Number of distinct symbols: " + numSymbols); + throw new CRAMException("Bit Packing is not permitted when number of distinct symbols is greater than 16 or equal to 0. " + + "Number of distinct symbols: " + numSymbols); } } - ByteBuffer outBuffer = ByteBuffer.allocate(outSize); + ByteBuffer outBuffer; if (rangeParams.isCAT()){ - byte[] data = new byte[outSize]; - inBuffer.get( data,0, outSize); - outBuffer = ByteBuffer.wrap(data); + outBuffer = CompressionUtils.slice(inBuffer); + outBuffer.limit(uncompressedSize); + // While resetting the position to the end is not strictly necessary, + // it is being done for the sake of completeness and + // to meet the requirements of the tests that verify the boundary conditions. + inBuffer.position(inBuffer.position()+uncompressedSize); } else if (rangeParams.isExternalCompression()){ - byte[] extCompressedBytes = new byte[inBuffer.remaining()]; + final byte[] extCompressedBytes = new byte[inBuffer.remaining()]; int extCompressedBytesIdx = 0; - int start = inBuffer.position(); - int end = inBuffer.limit(); + final int start = inBuffer.position(); + final int end = inBuffer.limit(); for (int i = start; i < end; i++) { extCompressedBytes[extCompressedBytesIdx] = inBuffer.get(); extCompressedBytesIdx++; } - uncompressEXT(extCompressedBytes, outBuffer); + outBuffer = uncompressEXT(extCompressedBytes); } else if (rangeParams.isRLE()){ + outBuffer = CompressionUtils.allocateByteBuffer(uncompressedSize); switch (rangeParams.getOrder()) { case ZERO: - uncompressRLEOrder0(inBuffer, outBuffer, outSize); + uncompressRLEOrder0(inBuffer, outBuffer, uncompressedSize); break; case ONE: - uncompressRLEOrder1(inBuffer, outBuffer, outSize); + uncompressRLEOrder1(inBuffer, outBuffer, uncompressedSize); break; } } else { - switch (rangeParams.getOrder()) { + outBuffer = CompressionUtils.allocateByteBuffer(uncompressedSize); + switch (rangeParams.getOrder()){ case ZERO: - uncompressOrder0(inBuffer, outBuffer, outSize); + uncompressOrder0(inBuffer, outBuffer, uncompressedSize); break; case ONE: - uncompressOrder1(inBuffer, outBuffer, outSize); + uncompressOrder1(inBuffer, outBuffer, uncompressedSize); break; } } @@ -100,7 +111,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { } - private ByteBuffer uncompressOrder0( + private void uncompressOrder0( final ByteBuffer inBuffer, final ByteBuffer outBuffer, final int outSize) { @@ -115,10 +126,9 @@ private ByteBuffer uncompressOrder0( for (int i = 0; i < outSize; i++) { outBuffer.put(i, (byte) byteModel.modelDecode(inBuffer, rangeCoder)); } - return outBuffer; } - private ByteBuffer uncompressOrder1( + private void uncompressOrder1( final ByteBuffer inBuffer, final ByteBuffer outBuffer, final int outSize) { @@ -135,17 +145,16 @@ private ByteBuffer uncompressOrder1( last = byteModelList.get(last).modelDecode(inBuffer, rangeCoder); outBuffer.put(i, (byte) last); } - return outBuffer; } - private ByteBuffer uncompressRLEOrder0( + private void uncompressRLEOrder0( final ByteBuffer inBuffer, final ByteBuffer outBuffer, final int outSize) { int maxSymbols = inBuffer.get() & 0xFF; maxSymbols = maxSymbols == 0 ? 256 : maxSymbols; - ByteModel modelLit = new ByteModel(maxSymbols); + final ByteModel modelLit = new ByteModel(maxSymbols); final List byteModelRunsList = new ArrayList(258); for (int i=0; i <=257; i++){ byteModelRunsList.add(i, new ByteModel(4)); @@ -156,7 +165,8 @@ private ByteBuffer uncompressRLEOrder0( int i = 0; while (i < outSize) { outBuffer.put(i,(byte) modelLit.modelDecode(inBuffer, rangeCoder)); - int part = byteModelRunsList.get(outBuffer.get(i)&0xFF).modelDecode(inBuffer,rangeCoder); + final int last = outBuffer.get(i) & (0xFF); + int part = byteModelRunsList.get(last).modelDecode(inBuffer,rangeCoder); int run = part; int rctx = 256; while (part == 3) { @@ -165,14 +175,13 @@ private ByteBuffer uncompressRLEOrder0( run += part; } for (int j = 1; j <= run; j++){ - outBuffer.put(i+j, outBuffer.get(i)); + outBuffer.put(i+j, (byte) last); } i += run+1; } - return outBuffer; } - private ByteBuffer uncompressRLEOrder1( + private void uncompressRLEOrder1( final ByteBuffer inBuffer, final ByteBuffer outBuffer, final int outSize) { @@ -188,7 +197,7 @@ private ByteBuffer uncompressRLEOrder1( byteModelRunsList.add(i, new ByteModel(4)); } - RangeCoder rangeCoder = new RangeCoder(); + final RangeCoder rangeCoder = new RangeCoder(); rangeCoder.rangeDecodeStart(inBuffer); int last = 0; @@ -196,7 +205,7 @@ private ByteBuffer uncompressRLEOrder1( while (i < outSize) { outBuffer.put(i,(byte) byteModelLitList.get(last).modelDecode(inBuffer, rangeCoder)); last = outBuffer.get(i) & 0xFF; - int part = byteModelRunsList.get(outBuffer.get(i)&0xFF).modelDecode(inBuffer,rangeCoder); + int part = byteModelRunsList.get(last).modelDecode(inBuffer,rangeCoder); int run = part; int rctx = 256; while (part == 3) { @@ -205,24 +214,19 @@ private ByteBuffer uncompressRLEOrder1( run += part; } for (int j = 1; j <= run; j++){ - outBuffer.put(i+j, outBuffer.get(i)); + outBuffer.put(i+j, (byte)last); } i += run+1; } - return outBuffer; } - private ByteBuffer uncompressEXT( - final byte[] extCompressedBytes, - final ByteBuffer outBuffer) { + private ByteBuffer uncompressEXT(final byte[] extCompressedBytes) { final BZIP2ExternalCompressor compressor = new BZIP2ExternalCompressor(); final byte [] extUncompressedBytes = compressor.uncompress(extCompressedBytes); - outBuffer.put(extUncompressedBytes); - return outBuffer; + return CompressionUtils.wrap(extUncompressedBytes); } - private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){ - + private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){ final int numInterleaveStreams = inBuffer.get() & 0xFF; // read lengths of compressed interleaved streams diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java index 9216d9ee55..437dc7ccbb 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java @@ -10,8 +10,11 @@ public class RangeEncode { - private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); + private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0); + // This method assumes that inBuffer is already rewound. + // It compresses the data in the inBuffer, leaving it consumed. + // Returns a rewound ByteBuffer containing the compressed data. public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangeParams) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; @@ -110,9 +113,7 @@ private void compressOrder0( maxSymbol = inBuffer.get(i) & 0xFF; } } - maxSymbol++; // TODO: Is this correct? Not what spec states!! - - // TODO: initialize byteModel -> set and reset symbols? + maxSymbol++; final ByteModel byteModel = new ByteModel(maxSymbol); outBuffer.put((byte) maxSymbol); final RangeCoder rangeCoder = new RangeCoder(); @@ -134,28 +135,19 @@ private void compressOrder1( maxSymbol = inBuffer.get(i) & 0xFF; } } - maxSymbol++; // TODO: Is this correct? Not what spec states!! - + maxSymbol++; final List byteModelList = new ArrayList(); - - // TODO: initialize byteModel -> set and reset symbols? - for (int i = 0; i < maxSymbol; i++) { byteModelList.add(i, new ByteModel(maxSymbol)); } outBuffer.put((byte) maxSymbol); - - // TODO: should we pass outBuffer to rangecoder? final RangeCoder rangeCoder = new RangeCoder(); - int last = 0; for (int i = 0; i < inSize; i++) { byteModelList.get(last).modelEncode(outBuffer, rangeCoder, inBuffer.get(i) & 0xFF); last = inBuffer.get(i) & 0xFF; } rangeCoder.rangeEncodeEnd(outBuffer); - - // TODO: should we set littleEndian true somehwere? outBuffer.limit(outBuffer.position()); outBuffer.rewind(); } @@ -180,8 +172,6 @@ private void compressRLEOrder0( } outBuffer.put((byte) maxSymbols); final RangeCoder rangeCoder = new RangeCoder(); - - int i = 0; while (i < inSize) { modelLit.modelEncode(outBuffer, rangeCoder, inBuffer.get(i) & 0xFF); @@ -230,8 +220,6 @@ private void compressRLEOrder1( } outBuffer.put((byte) maxSymbols); final RangeCoder rangeCoder = new RangeCoder(); - - int i = 0; int last = 0; while (i < inSize) { diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java index b017aa9e54..7759f8c853 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeParams.java @@ -30,7 +30,7 @@ public static RangeParams.ORDER fromInt(final int orderValue) { } } - public RangeParams(int formatFlags) { + public RangeParams(final int formatFlags) { this.formatFlags = formatFlags; } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index dcb81c8d5f..9cf18cae13 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -82,11 +82,11 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { // If CAT is set then, the input is uncompressed if (ransNx16Params.isCAT()) { outBuffer = CompressionUtils.slice(inBuffer); - + outBuffer.limit(uncompressedSize); // While resetting the position to the end is not strictly necessary, // it is being done for the sake of completeness and // to meet the requirements of the tests that verify the boundary conditions. - inBuffer.position(inBuffer.limit()); + inBuffer.position(inBuffer.position()+uncompressedSize); } else { outBuffer = CompressionUtils.allocateByteBuffer(uncompressedSize); diff --git a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java index 71b7b39555..eaee961bf5 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java @@ -84,17 +84,14 @@ private static final String getUncompressedFileName(final String compressedFileN } } - protected static final int getParamsFormatFlags(final Path compressedInteropPath){ - // Returns formatFlags from compressed file path - final String compressedFileName = compressedInteropPath.getFileName().toString(); - final int lastDotIndex = compressedFileName.lastIndexOf("."); - if (lastDotIndex >= 0 && lastDotIndex < compressedFileName.length() - 1) { - return Integer.parseInt(compressedFileName.substring(lastDotIndex + 1)); - } else { - throw new CRAMException("The format of the compressed File Name is not as expected. " + - "The name of the compressed file should contain a period followed by a number that" + - "indicates the order of compression. Actual compressed file name = "+ compressedFileName); - } + // return a list of all raw test files in the htscodecs/tests/dat directory + protected static final List getInteropRawTestFiles() throws IOException { + final List paths = new ArrayList<>(); + Files.newDirectoryStream( + CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat"), + path -> (Files.isRegularFile(path)) && !Files.isHidden(path)) + .forEach(path -> paths.add(path)); + return paths; } } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index 9c1abafc29..3b5358075c 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -51,7 +51,7 @@ public Object[][] get4x8RoundTripTestCases() throws IOException { RANSParams.ORDER.ZERO, RANSParams.ORDER.ONE); final List testCases = new ArrayList<>(); - getInteropRawTestFiles() + CRAMInteropTestUtils.getInteropRawTestFiles() .forEach(path -> rans4x8ParamsOrderList.stream().map(rans4x8ParamsOrder -> new Object[]{ path, @@ -84,7 +84,7 @@ public Object[][] getNx16RoundTripTestCases() throws IOException { RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK, RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK); final List testCases = new ArrayList<>(); - getInteropRawTestFiles() + CRAMInteropTestUtils.getInteropRawTestFiles() .forEach(path -> ransNx16ParamsFormatFlagList.stream().map(ransNx16ParamsFormatFlag -> new Object[]{ path, @@ -100,15 +100,13 @@ public Object[][] get4x8DecodeOnlyTestCases() throws IOException { // params: // compressed testfile path, uncompressed testfile path, - // RANS encoder, RANS decoder, RANS params + // RANS decoder final List testCases = new ArrayList<>(); for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_RANS4X8_DIR)) { Object[] objects = new Object[]{ path, CRAMInteropTestUtils.getUnCompressedFilePath(path), - new RANS4x8Encode(), - new RANS4x8Decode(), - new RANS4x8Params(RANSParams.ORDER.fromInt(CRAMInteropTestUtils.getParamsFormatFlags(path))) + new RANS4x8Decode() }; testCases.add(objects); } @@ -120,15 +118,13 @@ public Object[][] getNx16DecodeOnlyTestCases() throws IOException { // params: // compressed testfile path, uncompressed testfile path, - // RANS encoder, RANS decoder, RANS params + // RANS decoder final List testCases = new ArrayList<>(); for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_RANSNX16_DIR)) { Object[] objects = new Object[]{ path, CRAMInteropTestUtils.getUnCompressedFilePath(path), - new RANSNx16Encode(), - new RANSNx16Decode(), - new RANSNx16Params(CRAMInteropTestUtils.getParamsFormatFlags(path)) + new RANSNx16Decode() }; testCases.add(objects); } @@ -139,7 +135,7 @@ public Object[][] getNx16DecodeOnlyTestCases() throws IOException { public Object[][] getRoundTripTestCases() throws IOException { // params: - // compressed testfile path, uncompressed testfile path, + // uncompressed testfile path, // RANS encoder, RANS decoder, RANS params return Stream.concat(Arrays.stream(get4x8RoundTripTestCases()), Arrays.stream(getNx16RoundTripTestCases())) .toArray(Object[][]::new); @@ -150,7 +146,7 @@ public Object[][] getDecodeOnlyTestCases() throws IOException { // params: // compressed testfile path, uncompressed testfile path, - // RANS encoder, RANS decoder, RANS params + // RANS decoder return Stream.concat(Arrays.stream(get4x8DecodeOnlyTestCases()), Arrays.stream(getNx16DecodeOnlyTestCases())) .toArray(Object[][]::new); } @@ -158,7 +154,7 @@ public Object[][] getDecodeOnlyTestCases() throws IOException { @Test(description = "Test if CRAM Interop Test Data is available") public void testHtsCodecsCorpusIsAvailable() { if (!CRAMInteropTestUtils.isInteropTestDataAvailable()) { - throw new SkipException(String.format("RANS Interop Test Data is not available at %s", + throw new SkipException(String.format("CRAM Interop Test Data is not available at %s", CRAMInteropTestUtils.INTEROP_TEST_FILES_PATH)); } } @@ -198,9 +194,7 @@ public void testRANSRoundTrip( public void testDecodeOnly( final Path compressedFilePath, final Path uncompressedInteropPath, - final RANSEncode unusedRansEncode, - final RANSDecode ransDecode, - final RANSParams unusedRansParams) throws IOException { + final RANSDecode ransDecode) throws IOException { try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath) ) { @@ -222,14 +216,4 @@ public void testDecodeOnly( } } - // return a list of all raw test files in the htscodecs/tests/dat directory - private List getInteropRawTestFiles() throws IOException { - final List paths = new ArrayList<>(); - Files.newDirectoryStream( - CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat"), - path -> (Files.isRegularFile(path)) && !Files.isHidden(path)) - .forEach(path -> paths.add(path)); - return paths; - } - } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java index 1f9547e744..72a88da1fc 100644 --- a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java @@ -17,25 +17,57 @@ import java.nio.file.NoSuchFileException; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; public class RangeInteropTest extends HtsjdkTest { public static final String COMPRESSED_RANGE_DIR = "arith"; + // enumerates the different flag combinations @DataProvider(name = "roundTripTestCases") public Object[][] getRoundTripTestCases() throws IOException { // params: - // compressed testfile path, uncompressed testfile path, + // uncompressed testfile path, // Range encoder, Range decoder, Range params + final List rangeParamsFormatFlagList = Arrays.asList( + 0x00, + RangeParams.ORDER_FLAG_MASK, + RangeParams.RLE_FLAG_MASK, + RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, + RangeParams.CAT_FLAG_MASK, + RangeParams.CAT_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, + RangeParams.PACK_FLAG_MASK, + RangeParams.PACK_FLAG_MASK | RangeParams. ORDER_FLAG_MASK, + RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK, + RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, + RangeParams.EXT_FLAG_MASK, + RangeParams.EXT_FLAG_MASK | RangeParams.PACK_FLAG_MASK); + final List testCases = new ArrayList<>(); + CRAMInteropTestUtils.getInteropRawTestFiles() + .forEach(path -> + rangeParamsFormatFlagList.stream().map(rangeParamsFormatFlag -> new Object[]{ + path, + new RangeEncode(), + new RangeDecode(), + new RangeParams(rangeParamsFormatFlag) + }).forEach(testCases::add)); + return testCases.toArray(new Object[][]{}); + } + + // uses the available compressed interop test files + @DataProvider(name = "decodeOnlyTestCases") + public Object[][] getDecodeOnlyTestCases() throws IOException { + + // params: + // compressed testfile path, uncompressed testfile path, + // Range decoder final List testCases = new ArrayList<>(); for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_RANGE_DIR)) { Object[] objects = new Object[]{ path, CRAMInteropTestUtils.getUnCompressedFilePath(path), - new RangeEncode(), - new RangeDecode(), - new RangeParams(CRAMInteropTestUtils.getParamsFormatFlags(path)) + new RangeDecode() }; testCases.add(objects); } @@ -55,7 +87,6 @@ public void testHtsCodecsCorpusIsAvailable() { dataProvider = "roundTripTestCases", description = "Roundtrip using htsjdk Range Codec. Compare the output with the original file" ) public void testRangeRoundTrip( - final Path unusedCompressedFilePath, final Path uncompressedFilePath, final RangeEncode rangeEncode, final RangeDecode rangeDecode, @@ -79,14 +110,12 @@ public void testRangeRoundTrip( @Test ( dependsOnMethods = "testHtsCodecsCorpusIsAvailable", - dataProvider = "roundTripTestCases", + dataProvider = "decodeOnlyTestCases", description = "Uncompress the existing compressed file using htsjdk Range codec and compare it with the original file.") public void testDecodeOnly( final Path compressedFilePath, final Path uncompressedInteropPath, - final RangeEncode unusedRangeEncode, - final RangeDecode rangeDecode, - final RangeParams unusedRangeParams) throws IOException { + final RangeDecode rangeDecode) throws IOException { try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath) ) { diff --git a/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java index 1c3fb865dd..37a9081574 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/range/RangeTest.java @@ -2,6 +2,7 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.CompressionUtils; import htsjdk.samtools.util.TestUtil; import htsjdk.utils.TestNGUtils; import org.testng.Assert; @@ -143,7 +144,7 @@ public void testRoundTrip(final RangeEncode rangeEncode, final RangeDecode rangeDecode, final RangeParams rangeParams, final TestDataEnvelope td) { - rangeRoundTrip(rangeEncode, rangeDecode, rangeParams, ByteBuffer.wrap(td.testArray)); + rangeRoundTrip(rangeEncode, rangeDecode, rangeParams, CompressionUtils.wrap(td.testArray)); } @Test(dataProvider = "allRangeCodecsAndDataForTinySmallLarge") @@ -154,7 +155,7 @@ public void testRoundTripTinySmallLarge( final TestDataEnvelope td, final Integer lowerLimit, final Integer upperLimit){ - final ByteBuffer in = ByteBuffer.wrap(td.testArray); + final ByteBuffer in = CompressionUtils.wrap(td.testArray); for (int size = lowerLimit; size < upperLimit; size++) { in.position(0); in.limit(size); @@ -174,7 +175,7 @@ public void testRangeEncodeStripe( // When td is not Empty, Encoding with Stripe Flag should throw an Exception // as Encode Stripe is not implemented - final ByteBuffer compressed = rangeEncode.compress(ByteBuffer.wrap(td.testArray), params); + final ByteBuffer compressed = rangeEncode.compress(CompressionUtils.wrap(td.testArray), params); } // testRangeBuffersMeetBoundaryExpectations

x z@H)28&?nz#t_v_~e(<`BD2ACL501^6P82(}!p@Kp;DaTXOU0q>@`R;`okRvP(Re4m zM+a3@;h@7CE$Jm9lsp;ZR_$+tF%~~Z37^95a1p(&$tOWqMm>Xv*piTJU$rtPa2~>i z1&TyV=`1?MPXFB_mtPJf7Q_hmnQOR@-HH}z&-5+M+T^%A3x2{8bww$5keE|>HV?b*Q~tfAZI5^ur`0_#5%Ap5*5s~ z^}A-VNYT|)RzF&^I+r~6M5!u_6?H1L2&3+L(a)L)NHr1xn zF1^d)*Dl4hlm2b&Ydks$DmfZxTsQhs5p=A$s*?WSX9)tn=-KLiUQFc9fO0$zPdObg z-CM}k<%NsRI9U>3+$gxAilA1`o@)kARk$f{TUj>7hUjrQq zMF(W@@-<4@NcCTM8#d-42cD+r;PAt{nugc_q7N3crYcf6Vt(1u=FuO}dcFT`tkBh^ zqzbM@=5-408d{1#*kSvF9jhm9Ls)cwqcdQxxlAkz|67Ec&-eFpKY8|43yg4zd?d| zmfuByQQIBGGT#41k>~x}kIZ_*`(M6t62wQ>Rb2PAessB;&rEPI5s%8sy$L)nNzq(-mQk*9RGAJZek#vlxr?a`mOInw@liux zWvG8?do`@YA5WUc>SLgpNO)x-5C89gl$AMo2z*$=d1P!hx@+`)jc#3qd>lu*$0tTiqw`-aG)d~mz{PyTwF7WcXu3`dnpkC?p{NO5s?J3 zGh|>y1PX_XpH`%tEz(@O2;=$c zgTQnskE+%p9yxnW<)tI%oFa~*4uz!wR5nZ^OAlJ=Uioo9@sM z5S#fD5`7T58@VomT;bL6o;@-fg|*A5pI23>28@mamANfM<^|ugM~-P9Q&L z6O4Ug6>4e15a)P6BcoeIZR=3uqi**giKgvO?R7PRg}9iCH8pW!VEpk>3NZr=2^jkb z%P-Db9;T;s0zTF+Icy$>)#DVT>Pk{v)Ym<(%vi}AoTd##uU>{5HtT2NxLy-3rKf4d z1`Uu6@UIKLs;}*yrv*=lXKT1}Lbk$rc-`05WoeUPDyMM1QL z;Tokh3*Cgu@0Cra(umtQg-T97Ejd-!)_-;(VngWmmiyP14U*M|EhS7Ok<2wRx!z?h z$pgkvYuO1(x~eaml(Ibgco&wBb@?`mn3~9*RYmN_=%@RMJ$I1*@3<_LpvTrxoQ)GTBFjW}L=& zlm;X<`Uft)+P}@d9mHYA`S?GdT)QH8vR&bd{Ud>`@yD=Sio`1}3onn%k&HZ4giG^U z(6^g;i#TCW*6VyhSad&QovbTjsQ$V%P~MJ&M6FY1Lzlj|{a*{}v&_gZTpRk~{?Mr0 z%|75_NQ;c|o)e`LyNsuS3%u5*(-E3mMY3k4R^*VAZYO}2^*=OHN-;NEDSj#HI`V|`l;1O&8 zBSW$DKzoP4nsY0K+&_&@A#!la#}w6magN0P0CX?5vGeDTONX!jy@saw?GEjeV1bwVLhr-&m_Se6p%*}Bh1!r$~Qp3J!7{ioGuMhq2_S8eSlEb z-w}DVF~#6OfR+GdqOL0KvyM=c(T5ETR5cHUoKDFbHO-Z~!-Qy14TiRMVJErVUK0L0 ze1mABU)_q-;dl$-QKmf-@s^*Nr?fkwh7W>WX$2Y&2Wr>ui&qqhkF0Nk z@^=#d_`qFBqRkkPU+dnf(}^-MvM01hx)R4zCVM&Vrihg(nKn80f>ga&n3gS*udx86 zpi=sSqwlRCn)3mr7yWAFfw-e@fkpmoGmE^%fRm$-88ai28xq5^sAF8QMf;{)-A`HV zUw5O0K9rSV3}WT6MzK=5Si~o0_tDhQ;2kNkZz0L5Z02Zd5Tp|fjO$Z<9&*$8JP65_i_aI zbaV-5B^H>YXcbQnuG1lFhK89Ixpy<#5VZmJsou$Js{9Ui4N;dTiCywz^Nr=MY+*+^ z(T5*1BIO-qM77zdxc7iE+Uux%VKpw(*H|^btQC^m?W$4U?JF|p+lsf1_pymKIaW#K za$QN%4Zz#5o6HO#vx$~wm+HOcSiz(FbuU3OY~bYhjeO)*0&>eZc?}86t!3VGSAzt=%}b*bmQCEMKsN^6m9N8 z;Yt@dk+WJ%z>Domyaea+Se(h$eEKn;B`Mi!{kQtUV=rMUj1oy)j7;8J9UZ-Y*R21U zYsL_nl%$LrqDljAg~1t?*m zx!!D{#}UNZg3C~&`V=n;rWUeMC7RIyIr9_y3jz#yOVSmm=z*R^xS;amPVZknpFX>c z!m=dvhvvqvB>Sx6__#2spX2e0w@9QObI_0IY&rp}Ss6u{USZWBEefLDv@Lxdu(mVl z@AN{dH``cYUlxVO)%;r|lI#pQH=9%sOBm_t^Wh6onH-*Pt=`!14>Yp=VaWpWHty6| z#*AIEAoUEIlkDXb?x9A2tN3Cp;jpveEG(zH-hbzX#_9*WJi8rJHIr{|b*!b%@He86 zLs21Py+?Uqz~ZFH;EgR`*u$3ytx^PSZat2>11Be20|wwIuL02wbu=t`j4daw=#I#XZB6Azrya0xZy*3R4skIjyxuQ|g>}3}wkREU1xP|f1iA{e z8}l|7Vw*+pAYTO+<5V(A4SqF8k2df}yWEF;p2bcrU09@WM6>pEF+p=`P{A(j-hdW#G(aRfY0mcHQh*~@ z){x}v#&tH>*2g_tiQZ#E59RR4S-RNz_0QLz39S=r!uZP|mpru)HR zz}ng5yaa_fv0D<85IC+ZY(Npd@m5SZ!^L%fiNF!(mBIg-lgrpWM+C7hR zI^rIS5QEK1+~~<-avCP({Y*QZqP%x$mzx-#8q5xUc?f^L@lPt1r43wY8FSfjRx0e8 znaHq`82+6k%+P$Yw51}T(R$7?_&a_ME4wmhwNj)G(0-8~-jjV63oi4lg;3S+=Z5w) zgSvZTD(MlOD2?eys82gr|Ir&xRrb0|IJU=Uc9iJ#2i*dFSPHT)0<0v@?31dm5E}@&o;AT9oK|tEV|}Hi7s@Q)S{;KG`U{l%MBK8iC^FE#2yz0gP!s zgp|3+gmk+RU(b*|e`oGA2?NpJ==HPwXnIE*`VC$pie0*PT2(=Vr(L#sIGC9y<6 z$XTTn8U8UixhAl>K>u{do^scPKQRiaHR_JB3v3?!h88eRk-Uy-)vuZPP=f%%88o(% zr(=(#chwJQMJyv5N&Pc5U*n$QW;vN41iI1dlJ{<#8EI}7XpSX{+_u=bAbf$hOz9~& zZWvN6&~kn~<>5gDSkdup@S;B+afhr)jZT+coopj*X}rFnIH524si7(#oO70_GkhBH z99XVNU*>^EZ{HXND(#>u2%__wPV|4~KOWI7*_@yHXhet|ehld0Ms5_KoZfJ+3CZhT z3_8d-EXQy81tWf%2$L8AsY@if=yM zs005cc2tY0F<}(^aUfYSec!8^$v9 z^DT1(mf6xGuK;v75ryZcMKzXhpA?_CfmIU6qeGd%K>D3zWoQ$ZHHq&;<${=^u=JFT zc`nvG2K3$|5fR%2}F!9|#0blB>sib0A~0R#1NfW$`-n6V(7(0w(-6XqB;J zW%C68kGEn>+wU5#MO!+34j*H&8kgjv)Em)`h5=2 zyh9=|7$Ndt@9>%73gmC}jnpbTq(}b-uoAhua5lexcOltyi=N1P%=|Ej*2_3rv>R7= zj;pi+_%>q>LdK%NBF)>kdp;;s(auWV8v<>XkLby{1$@cS;UzOS{=KfEb59%VA8sUd z^hx1}*r!Q3^%m7$G!!0cGC-I>)^fp?lHTrN&iBi}j^fO=miAKGhxmFyuhJ+YhW~Qq z4Az8^1wz+qDVQ_hth_X!Z#}V|)!J6&JS9Ft@-pLqPTTS@b3jG7w;0oz>rs~k(|)Cc z{C9ucc(JJ(k8M}Yf^?xzJo+EQqwSvw?z)NqMKe!g-X?NOy=f6PHh&x)Z;p zuA`plvWOwiE)Qu%*!OXN^YWeI1tFac4rZX8wRg0&0?cJ(B~y)n9UgO10H=h|M07gB z`j(tjFDt0}V~i&X!w1U!8)|e+AYU<^at8H-5gBu%CRt%fr>Z|4Ze_BOw==&ZS42`K zfBkGi*I{chq&b^tZ_xGz$SdqFMokWcEiNh&g#dDhV@|*2FA9J7@-I^`>Shc+a;q>? z1c3Qtd7$qvai#mVho%Drs23^u8qdZ`kW+O)m>SY%l!oeYDc;+1Vu;oY*#({z_r;b% z;^12tDNeF*jiJ@FD3r|48QBNDyKy`x0h`xIGR@vL@3$jZ3!h2Nsh9*#zZj$z`|h{$YplhzsYk^WdL zR98ITqF+o6ckNZ>-TtpgfH6R-cib}iPNk?BhRj8G1HE$!1|Q!M*`A^{eK0qdx=sOX1cpR`ysY!!!~TJR)!{F$x^#b4siee1rqj+ zxN&8UX#Ig>&~W2B(ehWR>d$P$u*(io1ED}p^=?c{$;P2QxWFZDxb>KF$BaId_i z!Yl+G%meT^Cko7zdMI?VBKQtBd07_AZ*fDkORX9Iuy-~q`n8`bV0HNyGVpA(Q|_pn zDpsUsvFG${6yctEe8_Imk!br8*@}e&^pnfk9=ZLVUiG zJP%g(6~50fbXrf7jlj3s27xWU5T06n6suPcfHH(IF3BN6kIQkJ>VcG!SUlh!onD;e z7Rb#+>Di||PB6#s$zROkEi0ZNX2cP1YL4^40Fn!nK9RfOP`(s5%@gd0?tY!d%OAYh z;yCn&a#?&BJ;90xullKZl$5p*Jr#r5-|P)`)w(d6!+kPRl*e!%F~-jz17%0p7L;t^ z3>*&ScPdqAEFSGh05=dkxg1B#Z2QQ6Z0T=2XOzO2tI2&ClbA4vcrb6)KAgB7P{`4_ zT=v(n(?}ii#iU!-IvK5fKCSK9yO^bdIZzWx2Yvj~)5^5llxTQ0Xmjn;81`L`hs;%& z@3)rzZwOOthJ&cD`XXF2=2%N3j??VbX0YK()TAsMg2xqHbS78JC*^yb$mApixVcBe z%=1qoBxj@^2WD0lHw+5G%Tx1X#!ITX>j|V%q=3);(9$NY^_hn$CT0tJT+0-*i0%>T zKsv33LySkvDJmbZ*4aHo_vcb3P}?Sqa|)5Ar*AkFC^41rGY9rfD}GJ3vq#+?mHI|h zW$J-6th9iD_0$XJ0&ZX3dU#j_YULdsQI(YJVz?5(>Rw{z@owaZ+i~pp%>nN(Qdnv< zOP!z3-#~aQQF`v0D~02cavf!{JAk0weVzlUJo|2m;eH6k^anj7%n`||fHH;`; z-e5}jx5aZu@fxei4q} zk>fF_A-$fa2Or_NpKhCtCW{Gf!oW=lvOb{g1P6+0%QbdY;;lb2`dX)BWw{9;ZuQSx z%3obtRGu^DKC?fMsi$EGDVc$YF3RpDg7^@6S3p^+`ybx$wp*_X!FQj^`CAQ`+bPC! zf(u%JC+KeLg=lm6vj9s0*|6<}*UpPpoG8RO&Ti}C?J@!Cq5_~QOdsD-@vonBTqiu| z!TTVS%OgcZ^0h;p-|80<4yhxLFKb>|Fb;Db*C{$;MBB2>r(aY0spmiV5%_VaA{I!c zVTTiGJu0ARs>_#0#$=Cr=&Esx6P026@?Y$l-;!?lx(lD)%`kE7OVURJh-l3iQ!ktvw6#F~n@`iKmkMhF#UMuvy!OzmuzI z_{w~v!+2FY%l2ufj{(^CPLzQ$RK_u(RWZy08hUBKm3x7Ew*W0^nLI#IV}#&9%E@{T03_Y~=8CR=}lnqTu=X7kkI0)k1CVI7@o~uzUMFz&GrX z;R=RDnw&AWz=HKtOh?31%GezDn_Ekq#&WaKc=3}y;6nj*PTK|-n2bA%#-H$cBQUz& z_&YTnBD+vyY^`E+RAZwT7HI`zt;a=iswKke01JOFTKq4fp~3W?xCO0YrcIce6xo7y z-@!8IK!aL1kfxK4;l3_#zKkVgb>Wm?QUp6K5EtzUgVm$flv-0VBclFTWgwvxvB?vdASzbT4{zbi%Sglt&FmDByTYo8&02zcCu zW~971AsO;Y5M3v#+caymLjGgUSqo72ttYJAAqnajp^>xRC}L~k%QK2c(08nO)=L1H zyP>H>1f(I$cg-kaRRm^vT_X&H31#>{R$*hwaq`J5UHw$K-!keI^Vkd4sc!!MwWL~6; zUwXPKU;J@Ehpr(+p0XI-kM@ZQamLaN?zy%3d6iiwxqpc6_{TJ=NkX7>d_jCkfi>Q| zI9<_w z%F={#ko(b$gitZ>=_2?B=#GFGgchRRbmCFL$kf`(O~`#O70HgheYlx64jqL7fX4@m z!{z1g@+<VIW+)!yY6WVN#En-7>$vcGjZuOBe{Z0^1ZY>P!U-X47@z= zV+#v8>zm66XeeGD^Z4SkDQWJkVInXK)3&3W)>9lH_o0sb(y6>^(lL1uaiUxKKUeYK zCA}(1ZB`_?2uL;s_tQ;Pr*}fxaZ4oT`LrGueL$E;P)69VDdJmdoYU~Il2*P>$4iH{ zLKe9St#D<9tZ0dk-X%4e50~Lrkq2UEZj3Tp{bjdz&~c*k4fQi)e;F)xmPNF*b1E#>RlfsF~D0bh$c8Ds|B0_-(a&AK{S(|)LrJaFLt z@t+Q`D&%MWE=i8H{Bi>F=SQZ)Z(*>q4*SvF`^Qar2KQ^M zkD-lzGPBci`BC@i{lH!uDmJWqno+UE$W^!NqvHs)TA50jksW+?{vtDMrevpkA1i;r zo_F2zTEY>x3>||QPu@V>2pxLb9t!pMqUm-}h>iFa=wl@l{czA?L#*e>0X=2lM~%@2 zBVL;UfFk79Y#vWzB1n~jeU`BC8Ar=k`f7ivBW@LgekPa=Zz%o{agz%Gv`btqjkMu3 zUi^QdlQ2$(?JM9*ahVX+;$|^^ce7Df#L0bOsG{68lIzM{vI*)&E@)3e?qQX%19n&u z^QcJOxYmpVXR7xGaQ$pB-k(%U_`CoyK+eC&MY$K&PuR{s65>a)7vOI^OuE>%lo#+b zP@$Z)Fy-t~h4S|A?GD=u_+P|zVTAkg!lQ;m>#IN};$GJip%1#Np;_GTK+MUzErdo} z&RoHQAGr9;6eD!70`95iz}LBH{0|@uNZ^|>^Y77d+NeDLUPTS@xmT?!jP&wX-o zGL_C7|2On=Ihfh`AiU?xH>P$e&|c+J#_X+4MCQ`JQKYsw)Py_ccwwT5=d}|WNu~`r z@}%CfDBH&)3YCl{FZneI8v;hd+7W44L}WZGT34|T3$S_DeBVfgnr!1-1>LOAXD?t6 zs1|h~Tz0OfF#`@0-$bvseQwY6*AiE<|HsY}AJ14g&i@(DH5QZS9kfKIj);;j#%kTB zvyuwFO22!>ufuRWhsk2+GDIFvFGu(}S@)W%)JO}{YR`qAUrUBAOR7s_=Fv2w+I+gL zv4x{}HqmF5B(GzloM(r!>!r17RAtFT2rk+GFwNCN?Yf#PjAA?0A6YA8YbOUjf*C2* z{QEEtZtfZ=5ps9&yqH2p(g)4dzISMH_Jtm24D__P5`wqa;{dK(2raIQ0S2N-!vSw5 z;cM7i0w@?Pcc+Avv6-aV;dE%#N0Q^ece8irZ+~T8Ai#Q49=xo4&$Q|8bh_&Yeu9)9 z_EZ86v9chVzhSZGw43$$0CM?*5B<+75mz(PzB4OfcCkL-TYy+fJWkRrjXTQSR$zV< za8hqG)XlewFIzxoy=tBhIB>E?-{Mz7ya8=G1dSmAW|b>Q<0=PF&V4kt^j9CpTu6k* zbNZ|){OW%k23b{M-aQvimJah-Cz}=ruT|$e<{s?MpLMJ$ZK*YQ%q?@XO!8$wKuU z$(Kxq7;SWA{Bm?{Lo2hDksT^bj!vwq1u~;wF7KBS9d={(+qkfk4T0cfjv@Pk;#Pl8 z^m1!#rKc|jmt2ViovUgNM^)9juGPZo?d%0IVB&sdYpe${lRvl01i?nPb+UN9I0Cpl z76o;kpi4+lm!n&f<~zj4YO|H=yfs_PH&b%5&Vt*u>G`$0S`E!i!t` zrHkBIX-n)!sdkro~e0CtfRd zCu+HpT0c*!1##!fekOo&Gx??+wi3TY<)^biJmCp+bBX(QpJC$S&%N_JErjq7WX%}OfQto0 zETte=lq_-eN?;81Tbad|44f3by&yzj(DRFs0zqZ`6hhbs1 zaw;;1cV?CLYbR&$HGDXX%>{ow+1yNs9h-4b$1mOb=(EQixsEI84sT(1l_qpnm_n}T z3z13hB#B|cBVx1b$`ZdR-d?V^_nTP4^gX(UDrN8Y` z6KPd-0P~Lm?|~INUBXQTH+@p3?yL`Azfw_YB<+N!x#k(q#{i)tf@y)2^Y6`eGU?}a zt7dmFJ99(L48WKju?z4|V&#<)-tdxR1Ilyazt#IshObWOF7=583Ztf4xE-)F6XncQ zsfVGyfT37uCT01?^><66<;~r)(-y{qdBgPKDbJLMDjG*$E)P%!;LpubK@)vrL!XJ+7kExD013m>W8pJ7@tSN9*M5U2SIyK2=+y{4CZV{Ul9KRPk!2h zQtzlE?v(#sEQvq71JN7D3`Bdix~pWRP&*GQwJ;=T1KNdTK_-nw%1Np!=v%*Uc9u$d zS83@K9Wo(OUP|~`FKjAEB!x<5#(j>6GbO2V>cKsBu{&F_JMj&sbh)f6d_63h{#JNe z#ZILK@b?sbRW{WQXoiudZp*`QD(_+qnKscS7rI3BoRqI%_WO$>*Z}5RQF`>4zNqVV zA#inyvLY1_zWe0vujS`-i$e@1@@yqyX1r58R`I**QhW0P3?J!=&1g>v2N#NH89Jde;J@FK3WBqeZQu;!@>P+o23m#50p?sgmhALK^`RPFQEfFpEm zjzs2mrArUch`u?KSE*9)tH#4ae7>!0 zEkm8My&ZX(Hcz*0r&|-ALBZo0uFFXUYQHa?h)L9UnEnoCsxCd=4L z1bVNj_3#i_f}9`8BnbMX%S7?;xczJteW-sKZ-7=gw&j`cSh!OW7D@_LS-u2tN9o(> zE!v+>>gITah_7;C>oel%pEp`m>b(?i-lZ#dp_QuH_9z9UirWj1Y^L;28z{0KA)zgc zPDc9*&XJHGiIhes2X9AKKmMCLu?T^~tNRGL&k|le8`*f&II02lcXsXmyQ3l~9FbY8XQnETjyZ!B(24~Tdl0sgm`)Oo5E z6i1CN0|&cvd`>P(nQq`u3Xsx@)*z@F6cve{G|4at`kZ{c(y|*Qco^Zf8G* z!^zuLH$%S{Rx#DM-#tth*SL172ZFwK<*K^{E~DaQTxa+1hPgK>hP;Bm{9F&f2l_70 zOWc$GWh~-V0&g&ej(ALZDbSDU6$LM~NLESGWvd}}YbFubvylS&r24W)qK=wEO7t&q z_S{B4N=F~+F~L1HhjMGK@SyxE@%kbYGIRt5Mi#p9m}gZ|zOK@kdL zPnJjZQ+NCK7@fKB$I+$xB$rwXB5nr+OdXeKu=WoC>P0#mgMN}+C(A1CfK#Rg1P~B2 z%X#R#Qk!d$&o|zN*U8%;?CiPmgNG+C4BQaba~$faoXE5!r@}k`N(1-vs#Nx#$$LY9 z*y*&3D2dO4$Yg|53+I1%g#n8OVd-N%G(9YscExl0Qc(_3%@Yr(FlyBgC}tC%&o;na z>7;L2pLf#>?I$)igAw;%GEp3~a=A=J8a(5jKy4_%_M#BUmp$xAfSSrtf zj~DahQqZ!{bGYU_aK5%b<^G;u&1Lle)!%UY1~@Lz#rvSt{$@w4OhLQ6&+JixfdtVI z)ol^aOB0RV{5Qlv;9TmxIq#P{?WmGW8Eqt+1M+B}e9BaVV_}*W3 z)C?U_EZxZ;EsyM}-l`Meb!!irmnJ|ToTZP9qhpI9aG2XdvWrx`YLqEXFxWUAKx2*) z^?Ch5<&< zXW$z z89+|y7f|WG^|M4PB9;j{Vf`$YBN(PqRvvt=cMER=ipclO(+*hLracJ5Zc6$$+HXva z#{pZb=8qYzpG<&4iEgv&U-iL}WI=H^X0Lu(5Q*N|^Znqm?ASPv>gJg0_jMA&f+~g# z`aPy6c}sXaaH;w$mIR>tXrU`i$X`fqb|POh;T5-wn!GO`u;dFAg=LX)uBYlX%@!u3 zo0|57PqC-`Ilhf>40zmA7(NK%J#JVz0I7fXI##Lf|6#)*+#HNmJjxx=h>TWS>JC+_ zJtlgi*?BY(zSFVPa^Wmn=$OSOG%`g4hP+c_%>t~=#41=-c& z(}~wu?=}l+XqIcO3b=C=IICIc@IXwGYu9#5uUPLnHbh|r8JyLH3q1CokYH5D52!4o zJ)OZDY9#&eAy|1aH##<&@tEkW-Y*EBlIqcqo~WQ~aI&^nLiM&gd?+Z%a;|}YjKtr6 zpl0((paownhoH3@uci}2jup)rPB}S6K3+vaD=dldEPmo_1DtJS`NdBK54m5l zpFk7&g5ROsvG@yR4tG-X>}d4|SN^6$BHtuxcmIoV!g73dF4VjgF|0p*B;`i<$f{$r zOtrFIw5OvfkqxifFenl$gyIykXoHij9v){jKGmmaN!KGQV*$M7GD)QDm15=)oiuS_ znu)(F@N4?$up?!{-HWaSozo?w>H(Q37L#;A7q@lv!#Fm>HW2r(P7g*GHjJ&n+yMfU zz`_0ub(1Fa_Kv>fm{(|Kx&!g(`LeurgGk~;sjl#KyCNh7BH#>G`I#n}7;=U(q+B{~ zv#?dNK=!3Bu1EQNVXwJWw;r~JpjxU9-M@UqJO`e|*Eo1QM^^pBN5sHMEX1tQ8rGYW z&?><$WEFm_o%2fhvy8W9T0OypZ~jM%wl`fy;7WC0uO>cBztqqr{PwDVO{iFVJ{a{* z9Oe9N>8w)=0QdXu>086h&0izM;J@xrsga(%mVe%r9`!#$;Kd^a^d7cK$$`FA=hauk zA!cENDoNYH=)kXCZA7=x!gM11Lem{{()n z{$tdu4ZsaRM7QPabtgj`DC2LAIZf)OBH(NAM7D_^3aVv=+apAa;B6CbWZmV$@A9kO zkn@x42GV5ulo6cCJq0=A>E2Vutaf;mmCtEjkQN-#L)qfK3cPa7 zpM<^3OJsVX=g}Ksq|Me+8H3d+!K7Z z=EJ(L>q^sMB!&9kGr7?He`&B?i&x#ieShL$2B_L>nrY5@%BcCa_WGNz!v(n_mV0Gi z7Gm2Q`X_j(!xl)Z_q(I|@|muQOt@zB!Ru1+0-qvzjOk1d575TUS%tniew-C(FYRW? zK$4}o7$v*+BHHJkY=7?0Qxj7lI2f_er#Ihf_yF_trN_uNgE+b2Is8mf}-2#-pO z=}V_Q5dws#bFP7!opR%&jgob0(y6k|ltq2fFnZdV#OOrJ+7vMn*OtGdmkL?Q7k z4^f*C-T`f1)6}wr#0rHP5(lnsxBv~48m;t6aw4omKEE+?ynZb|_|XTTHn2B(rB$de znO_uP-iBuxG>BvcuyCzKmYU^qQvX?IIuy#Kp(?HzW!3~il)(q{v0b*}#t_ASVunKm$M)F;s-a<1uR?G-! zEN&-YuG^pbI1f79DZ#IrWH+3CFRlzv#PCYL+?O^VI0{?X{svMQSb**IsRyFtm9QEaa3*BsR0sd*7Eu7!`;eVgs^_Uqg2llt%;W$l7E`{ z`yo?L`giu;+zBD zDlu?P^P4#bW<6|yS?AIhS7Za)adeVj{}ENCRuwX@>Jx~Vf6x!n{4R)`eDbx2dVt@% zg==yYAy)@>H%#NLw;Lwqu@6!AM;mwI0y$FF&l(>C7~*`0A9EDg7j|;BpxETrS=gF8 zm&o!U?`3xqP{pj#vj;Y;x6;j5@C^2J?#J8pA#8T*BzXMwCsw--SCEqy?3_(O;^Uxz zaSanfiu{$2r1lkWThS2S5}X#I>h-&7$%LDqEGcEvXRx8M4e$MwCDqy`VqYfFoC6H5 z`m)^6d88{h_JKC2t3eN_CGTr(RTdG=8dpV~j2Zli3is@ODDOzuQMM!E17T8}erXgA zF~j?R!WOfQD->VAXM3%_s5!wq#*BFN-ry+t!QC*2(h?sMGDG60g%RCZN5^)LZ9Rln z&*bKE`a@f9$qfC2RVsqYKWBxW9izIM-^#_nwaf1~35q4UOb?u@PIW(~j4+|NNk+9| z-QwjBrLnrx|9&%?2?aqq1kXz|)dm^IA&`XK=;L4m<9iD44Aa(I(agzlAJlg}=de&1 z{%c_6%9g6XAfvM0J_cX?s16`+$v%$Kd4#PUrg>y3b_W-}OoCaKbOnlm@bS7ZH`9Qe zzW`$rKaKRdTHjycsyT>BcO-vZ%s>#4?N}cL1yKb%F+136u?xkvD&+O0X>7Hb$Kz5% zu`wp8=VV4hg&d_k&(Z!7H>vBN1fy_5VC1F6Jx2?)HQaZk5iQ^D{u^`P1Zavr5bA|u zh-!yKGBL)+B_b7SQbV;>Z?1uj9oJLj+1ejNM!`@MhMJpPM0a!Qg+?j`xh=mQ-;R~6 zKxO3xKzUFFvF3J->wjA-iml(-*x=3Ql zz(cZ*mO~CJma6KF?9+rdmbivQLdcy&4!01}4B=?;sbYKrgr%y)ef#lTIYK=lOI$ii zHL(e0)p|~G-!y_1oYZRD28SX*p$8qqGP>XHdZxLng>iLm^azzK`PwIp|8590?j{DV z|A|1Aw2y7TZ0oxZQc;#(eEPP%v)J_L;|+~hLwl9-vr#3sNT2;FsB@R%i37OXbsP)1=eUkwk`a)s{t_A)E>ono8uh*!m>RN0GcIR)Ek*K zUmnERaoqbv_7kD%Nz+4K+o?*)p4SO}8^@fa!Qt4?+o6E+B2b%aEaic}Pt9RrnbILt zn2hbkA>kJi;>sXt84j@oJ*MB&m^+>6z8_m#&Rewox0Z2N{*$Hubqj{6gkX>kpHxgh zGKJ9VkFtR^21uE&(!l3O+YVxjU;4i?)Kts4^lmf1g8mR~4`P!QF11WIWh+2O@pjLF zhlHKfwdR7b9P=$b#!qpM=<(vj<>04fXyr2?`To%-9xw3q^XSSNV?8LqXUYp*y?AG= z1n!&zJ7{xhr1W57LZoAD+1{#vov}aMND~BKA7@}6bEE4^BekXxabM#o0!^PyC9h7@ z1m8C!5}$MvaV2BTVxzym?IUnFJ)}b-q|ycI3FKPv)N!4@_!iSxa5>jjj(^*~sl4>U zHq!Bk(!!XKrxH~yYbE$+RYY$#jHiJh=W2*ulrW0aAzhk?Va)O$4u#Ax`+0B$(n^tp zH|oP{yRLO5{z_sPY6&^I8*hBqMT{p9Yb21?seaE(Zf;3HBLe)yxXJwmI0|JCDUgx~ zrm7%x1b3N!^H`i0Y|eAq_XptSbP8xK%Lq2ksjErr?Wz$E>dolMuM}yviR2JfELe;H zMbE(P`x1V_pl++DTr%zdNn1+Pc}7-@}%Ekz+56< zQmrH1i;FmgM0mvfH;O?s_W5#;XfNrWK45c2$r4OsR+kUxWrTG4U=VLs$?j@~bp9fD zkqB%%(3T*>)U|&IoCqD#PwmZo2QX8!4XNEAIq*Lm+e-D1Lz`O<2b7q4m!2kRUDY!7 z_B}Y#$p}GX!-%Q0*d*PH`_;v%^Tk`nzsX+F2L8f+GsO^|7h(fv7aydEgfeoN*5bNrbf?MUsts=9;?XOGfe{*;O~P`+p-itPGn zdR;=ZtqyvLY?Y}aymSW3)!F0(*&(wNho*~jh4Hqc#y2zig8{I~lsH)~ggjt0zQ(q= zBl4kZ6x6*$<-#-rND*m(9Sxuf^D!+ZZNmS4F;jWjUY!HW7-km$sQeS<^Iga729r7n z8+li{RlGov&Dd=uYw}KyF3x4;MWg#jX*7oH!9}OG`fS~dIe(!p0FTP6|NUmjZDZFQ zp&K`Doqq%^IED3Im9=!yXO?M$RmUMizbPIz*f$B>)i1iW5o~b=av6(kVTxXJ*7~nL z`(Afa&GVF$Og}^;3k-t4bgSr0i4-wh&k$wBR zn!BfEeUI`I^aJ$>bGSF&E4gk8ciL5gKW=k% zE=HTuaN8Z`eFn-C#`$bYMF=VaZF2Sh3n|ROEGu+^VHO3Cr3(Tl-M7RmnAe7eV4;NkcVGv&@!v^cp zg?=e%7+?EsG1XieP*;S9HD?61^4PQD->IuPB-~!UD@5+qne0avbvb;)c_Ovh6R0Be z(-{70TG!wKx6NT!($%wB>96g(W6EFgsytep!(Z4P*c0H*)Wb#gsy}1N;GKDNV9RdN zc`E5)wVoD@`%>=wKWDZAdhXq$OfM#qz%B$4kiBj$KV8p*i1jy~JeZ~XZ!c<`VjH%A z!KHj*wSW5VQl3lBE$<)EpJR{{i@UE|ED&>7xW^2 z;|BYRH$GeL_P#cv#te0j>i;r^`e zu*;jkMC8=aCbe^h7&Gcoh5+A8@ibR+v0VhLyl^0tkTCDPn!olbadn1L^KN)hlEbez ztgS|qw^^iDCPH6r&ul+Jw?ali%S?F?l)YA&O*5AC0+j;x_dMK&lgPi(PDo$H)x~Z^ zO6E=K?D4I4Wb2krsoR4W^Znfv@`reBC9e?L!96ICpC@v}xLfa6wXJbwGYr*dkKgws z?Y1-Zf7`JSmd63b{f8*K;&U-dmQybBshxPc(Y-F3m}--KO-9!=4{!~-@2-gt*BHG& zHwk(}2+m&sd$4F5C@I`$GGprpnSjIRBMpUSR_pAl*Ofv8EM>x){1Gj`9aI{Y{km&R zvldW;9{|e@uFRxAnpjg}d$1f+%Y{D-?O5(Bk?d4hbvC}`?nkB?GLkY&;EazUX44)X z)362A?G_t62#uHUTk=fKZFa}LUaU;05G2DR3SIM`IkCAO}Q$ND%4z$uU6(R~`Z=QY$2b+>sg!hXd0~&?M zxAdA*jijq0W?$`^IHqQs4MBz4YlkVL&4E#lsl)BJb+mp;;#lC&oA*BCHYhv;>Y=i# zVg0us0W<+h@=T7c=lR+M^$WA3}^lpPLUPW_I%e@~s%y|6Va8R4{@51uR#1i$wh{Q8>M_Ax!2%?X}AoYwX%Z{!h8WKlD5mIm$|=g0Nj_@TYR)67lZv_c>W zS!^?@F;xfUpe6-Mz~RO_Lei0B=)aADmVxLDc%&}f&ov}2sbJ)8eNE}hY52NX^LU|C zBR^GX7bJSxhC|Py1wN05NMgQ|B~)vb_Ks#9STx1x^paI}W7|q_%_v&H9o)F7wUYFB z%?(vjDLoJ%9Gj062jc^_ea}&)?Luxrcu5Obm1n1FtklTt4RiAP-PlhV9L=>{fUYAi zT9Jlbh||k+zryxs3gIrd*#NU`LBY}RtADiAN1Cfd=FWF$OJ}Q=06ia%5ZNOX5P2Ab z82n;EY0^p=z@H+>PQt?-g07bAgb`DCS=%-Q>vo=ep9$C1=#>)PU(r3v0HVCPuB@B$ zHIRvDTY`LhX}g#s(}KP}%T^V`N3@^6N(_x~6g}(2BP8x{yMkWV@?2>qIs!|`=XHM- znd`9`lQ)lIj{Y`3{JwyG`hxedD-e9VTwycLcAAQ?j5zveHD0mkGAd@2(fKg4w7WrJ6qMU`MC$iFRCiVro(&QAJ+9Yo1zfD5~SIfs+51K&o$b zxxpyh=W^W?U_)cA$Q_LmTa<9XJM8U<#R*1yN%lubqtrev zs4;VEtQA)9t5o?lGRO;!9~MHPQ{o2Jl$;ILK`gsM1nF~+_NjpPLu6vmKlIb_l8@Od z1pJtm0*6>{r2A9j4nJi6(mmwM_&mBVoCPQkJ%x5cFsj~z&!qv^1~4Mwvr+ol+v~#X zBlHzJk(*_qV2C^Xe~b){rol%g1X=WR;KEeg{V94}rm2LS7qa$HZeyfv(&1ow)8kKS zv7u7djy(~xQLBokheHJWb@8^#LC(r&#*6#MBdap{1>vrZ*S!gP^j#{(ur}%N0&~Wi zOo+?l7&JsktR3}m#E;vO7Z*m7Q><)MkH>iH-j9Z7vw((*0PL zw3-*>VE8IkQo#mrs53YH?n0 zo&9okGD!TrO$dB0L$Iw*lhcLaCOG-@BZ?Hrmgh9!3)Do|{RM^z)B+r^FIW4OUW;fH=Wjk0+{^#IEX3BQAE9lZ$`TG zI&j`l=USPA7c%Bay3D7zDO%ruiWN!37%syK{96V#)+O z<_`RWZ)_rE+H#&z{XkccDdfnPUHQu}SHLIxe|^tTGr6?K^tMCC68jABjmMtIT+lQrb{B1? z*nHk|%X{Sp+y-J={eK0|rcfN^=pE;hO3}BM<`AijELg|RU8u#5?zskV8al8%Xast# zBL4i7v>52C>%6;4S9^0IKb^iQ1^>!%b}Zgu3QRfh=`^ zt;7WRa3`)JNb5_^a(Ju#OSe4RDig>?`n4?e&GreO4K7QKJqKcimn>9f-9rGVx-$Xf zAc6sYPQ4^z(WpF+VtTA92RCsT#o>@|aQ^+K4Gq*toBmhtMs4X_F$rd@upyg2)nWh_ zP6*rxV-lr9lE#I?W#L=tF+y%pb^yU)-?&Kq@K%3;kKE_zmLor_lnbwt_h`RAzioWO zo>k|XRC(R~GpUV6>;dQD$q?NU&nqaDLg(-9XtWo65CsErFV@v3wV29veU^QI>fXu< zoc{P|H?wY`CT3UO%M5B0jP;g-IhAJjvnVUL ztRr2YV3o@n81Tg*+~@V!Tw{TW;R>HFp=;{Fbq-KiSe>3jQbFS=<#T{*tHKjRX&{xQD|Na`*Vg`*O*l`u6M2{#$cCjR_iOssk1%ceb3ukIuPQCEFQpQr0ALTkc8Ul z5b3lUrooeMD$k%;Q0-MXlllX2^DtB!@q^5JDm?h22xd-tB5O z&o=0v+E{&n``m}2TC+-XI-1;~!?<)wvOS(1RbD4*grpjAIt0^WY=m z*wq@RjD2FD{zc&1bwy9r7LFd6-)+#~oyS9%%28y7_SIg{!sN55n!t#Y?hRoP~ge0RSjV5=qh@Om1sJ zEM)EFAsD{Xc&LlrTWD@;Y;|Z(j$#vFvJ)9-q@EQGB?@& zJ{jfBy`35aQF$j~XnQa62``Dm91S|EWK(vaM4LI&&|>tG;{~$NG^j$?F7%{iIo%n^ z>onD4B7arJr%T3~LQ+EHPBcY0{^-kg&>_~KfAgHH1tBb|g5|uA^LDcZkTJ;;#&Q`W z35!N)gh#MgD+ypCp88t9T>A`cUN2!XnJ45;e(>MO9Ow>DTIGHanunp%Ll(x_qYv^f zYxOK;kyP@A?s>5L5r9Z^N5l6}xH@vf{4URIK%m3I`U4+SI4f9ZQgt>twy*NV+l6GW z%+1*%9#6`FT|zImO`H`T2)i1l1qEb4!0h_dYgA>z;y06iIu{95g!Zn;y87vAvbZL) z(8fZ&b^97@pCM^=dQVP-({EVTcOysU({wl=VV zcOsS;0riYi{4uABEG4T7-IxZUF2J|u{8W%P7g8^Mi5qwOw6PHVoW@Svx_fdzDrEO~ zI)@5MRIv*y=8t2B%q3^co<1QxgmY9SExZ;ty|}yOi%W_UWskE08+Bhh(M&IWmBkt_ z7Ky*alBM0|sEMO^r8UE(H!BJC6r_rypaYL7!{IN_XNWLosNLJ`9ft_YWE8Yw-q|$A z5}lc>WQ*~eK4rl>3I>U{Az<_OGNSM*FJN>&iLl>zC)HWUxT#O=A}jr|7kD_o3t}dP zFc||@c&EM|9B*CYI}gf;>H>M-UHA(GtiD2|CWMcA2P!^AaW!De>)=Y(mOxzB8djAU zfma-s9pE_d=Yn?>`uVqsI$MG_^iRd`=I z8Z^fMB`vysh)c?p%!`%IPM{xscKY}?@^;>oSIE#`px_rs%nJGm!hyKG;=W_NmcFSU zJWqXD&qoCYKx7WL&96Y*jR)gUjz(Ay%9dhqST~TvV|;p)R&4LBeTTt~k)V&u_M>_v zxQ4gM#vI;M@^q8nAY3*9hTG8aY%T6UG;swOn}=fqnN?MAwZt*+H?^6*LRt8Sa{pNy zLSt~8=VjHnX=XQN(vTyYEqe~X(t~W=Yk!P9wt=Oaf^tM|f{@_OO)R){1Y4S|Upz^) zA_&{j0#j|=>{D7pPSft1gKx1G4Gewhd|pyxOU(7yHbfDqZNp<0BA!=&47oItphY2DsL86{9fJ+r)OyR>_FNsp@5j6`Dw$v5XmL9C1_!B+I{>xzO8t zh8TGyyHWxXoNO5|+G-^nYi^${UqO?Yb}3~0oy8WUWdOc5+wTGI9zFMAYo7t*+XM%p zDQvUJpHdzgV0W*e#;LGm?YA&CUT&)Mw2EE7pZD9=c2;+7?_0;W{3^cUlsd`8&oH{+ z8F8_pqt9s=(jXiZKSTMGMpfVuQ9t$op7=N4X)90NiutwFidEEen?Y&Ad=3Y`^S`CD?g)YzRtxC!vKGLndmZAhStT3wIuZo^@uml~!d{R1U9 zwuH5D-pc#{#LruSO2yB=8lz1hiP$x6v^(numX7hb0;K|vJ%;Dd(~L20$6&~d*ibWIaaK4l##(UDOF|k zD@U`gQxffIOQ@6U{|SwRv2}M0%pGAu!UivlXF%U;vPmxZ!{3);<0BW;+FrcH-^q`d zotO{YF!5;y7(0)sul8&2jOw}Bm=~=XIj=aBLFo2p$p;K;ahgin4Wg3rt8$4r0Yj#Q zgWY)(AV*(Tfp=J(lxQHfU=PaghmoF_;3%M8@vUp$Gr2eN?U;7ZKC zJIq$pz>u(Xf3Du-FxK;S79HLM{X zj6iA&1as4|-lUj%yrSO0@=_H<|C$QwxU>hp@fap8nn#EQ-A=$}!ssHmylNAf(3aNQ{$3<<7mq@mdsKA4}bML>os;^Bh~&V_Jh@>0g3n-#?LUgB&W0hJOs=bGt@9hYAD^tDRwjtV(U%CD*0ys>|92IZAAB`nNG!0 zE_ZqZ4B`yvXQT%)hlOM@AW@NgQx`0vyFPmRF0>D6sPN!~G}KzUpsTzMfoX0pbcusF zsG3Kow8DGhmM90bZoy%t#4rb6;Ldr`C@Cy6CqiBT_t0Fsg5x9fSxhEe^bdbji;t+D ziS?=Aer+rYkUqAHUwor?YGGJfP#0~tPu(W7oe$bU7=o;y=^rdC2XVoax*J>he+E|e|VZr~tE>did z+fw6Mz!K>Js{;6AWmuKCB;Lsjb+4x7Ah(VlB>|Xbl>DVA3|XVxq3qudo_2QW^~42pfs z75{Qs;QE$w0<$euKm3{+uLp@)cXMq%Q#~lR1>o~m&dpDCS{NGy0}wEpfK`-}qo!E6 zF!fmZ#RTW%>vc}wx7@13q}VkuN!(IPe~p+!zd40Q)XyiJk6EfV4Gy~{9ZyQBnCyq7 zy(AD*B5t8mKx)L9a)uj_t z+j^3BvHI3AwHz~q@o1F$qxz;mZuT;L0mnI=3z2L87SxpdTCXr+uWntP6IJ8I0C^bk z$LRRLTLDrw8IqEphJBW43Wph$FM7NICXph=Yri#`fNu=@rx_-(!ed3etf?S|?a;PV zR0I$016h zV>0e`qn7_W$@v=Q0?*p9MZNrQ4Wlpa&MB&-lbn(`6yh@g_`;@Mo;bULvNa!Ahr4gl z7>v0)_xx&^q942+z11it)CI44;kV!4R1=y-)sC8vl{{doKI6F{2etnOCjG#Utgax% zu0sxh>J_m-I6kn~WMw*jaUHqvpc{c=!R;Y+=Q>~hA$$%u(TZ-_Xy?+LURs~Uakloj zg&TB(1eF+g2Jdxmj@SpN(WVHPgdN-}Ed7Ctcw@ce;%vo1k0u_OivI+~s~@VQXDesD z`@m8uk+52cRS!49Pm7`+Smjg=l$)f3TS*%&kuTEqb~^gt3R8nvF2N($gt5{ph9j;} z;T}74ulS~^B>i4toBoJ!?Wz=MB@xE5PZ{KuevOWCu{|{oY@zvhVc&{%R1qZV>ol)| zjfd?Ap<)>&F%h61$2D!4C0YN+GvGZke6qJXqeu-9Kil;$E*;rsp@)7igO^Dq{D*t zYE`V9+DD01{h53kv=C4O=|DL?GfJK@qiv88=Cl*DdSmP`IEN(cO&RjG>8}TIdmts^ z*Ej$GH^Tnv1Zmce@F)EP(|A1R#@*gbHb-nDO(rfsa_}yhd&=0mQ;3);zE4JvD#P5v zO7_!OJ9cSjraLF%>-{CApNet5`Yee%*(AEV|KS$0d!dw2GeC6Bk}*koX-J(T#d$6u zItMt(Jq%LJH@ev2@_>h-pV%c1bd%g{mMZ3lW;X=E;E=g{iJ-foU?`&kDf0Df9l9-q zBKw0|1FwbQeVYA)B;0Sy%foV#px-A|uJh~^ZBTbhW;Voqbhevf0z)4BZ-29Q?JU)Jj@_6DY>0wb zS9x3(IZREtHsXI@xFpP0kxf6MsJ^@28?ZOW0lF4@QHLhkZ8h-x*?Rye!N7(sSQo#P z(k%e_PT$DdYAvFX{jf8ZUuMBkrr~xUvxHuR_(+NfN0xeQ7}LfqBm8)_QF-5GNUv4NwdiyknH0)iOz(4C>F$wu;dOJLLn| z0^|7^kIOq9i0*VCm6V~%HIN=g+rSRu96yA{+8;6OP(+gk!AJ`c%b{ZuP!us;^tvw9 zhC_mk?-Sf4P*na*yD;d@VyB`6VIq$Zq6|*9EZGw%d<@D+6QwpGnTk~9Yv@9~%?y~M zWpMX?lQzL<#d|93o8kaXExNXEZc4X7Xm}ewM}P*Ca3l|fTR-4kkY+bS{*Ihf<@2qr{K?!&b4KEtjNc3$=}Hj_91+P$RyCy!|6#j{b8gDzx}?aI z-P`IXfwg*ILEiHcJZD)G%B1tf7sk@eR`PMEsUjsdPwCL?C&EhGjpzf)BTI7vK^MrJ zEi>DLRB;2>&3V93om0x=LDzGIcJ0qEyE-2?Y-%s;VKl2P-1U*XV+x7C0cZ0#ePSs+ zWwatUsxUzi29J_tX(uH4>=YjmP!|Zl;h(m{L&l3F;ga`x3TP_37OOKOMie7Ip}Uji z#$U}^6l=~JqcJ=-ld2+xxKGZCdo%PIrA8V9Rsd$FK_D?HG!j8Gfsb}0>Rd!fFVf*l zj+G=53A;^WRDtN~-z^120WlCBV^_a`3{S-6Bw(;uEph-J5kyfSJV)pKDFU-`@@YOo zi`*xg!G4$??Q?)`7=I}87%g~(Cpk4F>N@qk7{aX)kEX^ZoF6>ZL6T@@Qyc59*!_`= zOGxD6zF@4bmLSv2n*BWqlp&a(fYnU|{-5m*0jU61|6LG9X$!7=(<#e}bb+LLq98;f z)!8m+rGZLNWlt3!xkdxXZUjHeZp?a~MYsruA6x@y%aS*kdnvJPCA^8!iL>+Prf>bqFQC=i7Q#kYuSCCw+~A7x0I-Sg!~4e3BnPWgeR0 x7Ghcn+lr&wXo?PLFc`WKXma?a`$In{A^Kzr&(!`th%<*ENn59k>m%K6d;ng?Jh=b> literal 0 HcmV?d00001 From 8b383062168b77e49cc9dae108a2972a373de10a Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Thu, 8 Sep 2022 17:04:53 -0400 Subject: [PATCH 42/76] Use the Interop Test files from samtools-1.14/htslib-1.14/htscodecs/tests or ../htscodecs/tests depending on the env --- scripts/install-samtools.sh | 3 +- .../htsjdk/samtools/cram/RANSInteropTest.java | 38 ++++++++----------- .../samtools/cram/RANSInteropTestUtils.java | 16 +------- .../java/htsjdk/utils/SamtoolsTestUtils.java | 11 +++++- 4 files changed, 29 insertions(+), 39 deletions(-) diff --git a/scripts/install-samtools.sh b/scripts/install-samtools.sh index 84feb30700..e847b9af79 100755 --- a/scripts/install-samtools.sh +++ b/scripts/install-samtools.sh @@ -1,5 +1,6 @@ #!/bin/sh set -ex wget https://github.com/samtools/samtools/releases/download/1.14/samtools-1.14.tar.bz2 +# CRAM Interop Tests are dependent on the test files in samtools-1.14/htslib-1.14/htscodes/tests/dat tar -xjvf samtools-1.14.tar.bz2 -cd samtools-1.14 && ./configure --prefix=/usr && make && sudo make install +cd samtools-1.14 && ./configure --prefix=/usr && make && sudo make install \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index c345ef9f58..59201e22c7 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -33,14 +33,6 @@ * so it can be shared across htslib/samtools/htsjdk. */ public class RANSInteropTest extends HtsjdkTest { - @Test - public void testGetHTSCodecsCorpus() throws SkipException{ - if (!RANSInteropTestUtils.isInteropTestDataAvailable()) { - throw new SkipException(String.format( - "No RANS Interop test data found at location: %s", - RANSInteropTestUtils.INTEROP_TEST_FILES_PATH)); - } - } ///////////////////////////////////////////////////////////////////////////////////////////////// // RANS tests @@ -206,19 +198,25 @@ public Object[][] getAllRansCodecs() throws IOException { .toArray(Object[][]::new); } + // TODO: testHtslibVersion should depend on SamtoolsTestUtilsTest.testSamtoolsVersion + @Test(description = "Test if CRAM Interop Test Data is available") + public void testGetHTSCodecsCorpus() { + if (!RANSInteropTestUtils.isInteropTestDataAvailable()) { + throw new SkipException(String.format("RANS Interop Test Data is not available at %s", + RANSInteropTestUtils.INTEROP_TEST_FILES_PATH)); + } + } + @Test ( - dataProvider = "allRansCodecsAndData", dependsOnMethods = "testGetHTSCodecsCorpus", + dataProvider = "allRansCodecsAndData", description = "Roundtrip using htsjdk RANS. Compare the output with the original file" ) public void testRANSRoundTrip( final Path inputTestDataPath, final RANSEncode ransEncode, final RANSDecode ransDecode, final RANSParams params, - final String unusedCompressedDirname) throws IOException, SkipException { - if (!RANSInteropTestUtils.isInteropTestDataAvailable()) { - throw new SkipException("Interop test data is not available locally"); - } + final String unusedCompressedDirname) throws IOException { try (final InputStream is = Files.newInputStream(inputTestDataPath)) { // preprocess the uncompressed data (to match what the htscodecs-library test harness does) @@ -236,8 +234,8 @@ public void testRANSRoundTrip( } @Test ( - dataProvider = "allRansCodecsAndData", dependsOnMethods = "testGetHTSCodecsCorpus", + dataProvider = "allRansCodecsAndData", description = "Compress the original file using htsjdk RANS and compare it with the existing compressed file. " + "Uncompress the existing compressed file using htsjdk RANS and compare it with the original file.") public void testRANSPreCompressed( @@ -245,10 +243,7 @@ public void testRANSPreCompressed( final RANSEncode unused, final RANSDecode ransDecode, final RANSParams params, - final String compressedInteropDirName) throws IOException, SkipException { - if (!RANSInteropTestUtils.isInteropTestDataAvailable()) { - throw new SkipException("Interop test data is not available locally"); - } + final String compressedInteropDirName) throws IOException { final Path preCompressedInteropPath = getCompressedRANSPath(compressedInteropDirName,uncompressedInteropPath, params); @@ -269,16 +264,15 @@ public void testRANSPreCompressed( Assert.assertEquals(uncompressedHtsjdkBytes, uncompressedInteropBytes); } catch (NoSuchFileException ex){ throw new SkipException("Skipping testRANSPrecompressed as either input file " + - "or precompressed file is missing. File Missing: " + ex.getMessage()); + "or precompressed file is missing.", ex); } } // return a list of all RANS test data files in the InteropTest/RANS directory - private List getInteropRANSTestFiles() throws IOException, SkipException { - RANSInteropTestUtils.assertHTSCodecsTestDataAvailable(); + private List getInteropRANSTestFiles() throws IOException { final List paths = new ArrayList<>(); Files.newDirectoryStream( - RANSInteropTestUtils.getInteropTestDataLocation().resolve("RANS"), + RANSInteropTestUtils.getInteropTestDataLocation().resolve("dat"), path -> path.getFileName().startsWith("q4") || path.getFileName().startsWith("q8") || path.getFileName().startsWith("qvar") || diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTestUtils.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTestUtils.java index e5e20959c8..79b1b28c44 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTestUtils.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTestUtils.java @@ -1,16 +1,16 @@ package htsjdk.samtools.cram; -import org.testng.SkipException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import htsjdk.utils.SamtoolsTestUtils; /** * Interop test data is kept in a separate repository, currently at https://github.com/samtools/htscodecs * so it can be shared across htslib/samtools/htsjdk. */ public class RANSInteropTestUtils { - public static final String INTEROP_TEST_FILES_PATH = "src/test/resources/htsjdk/samtools/cram/InteropTest/"; + public static final String INTEROP_TEST_FILES_PATH = SamtoolsTestUtils.getCRAMInteropData(); /** * @return true if interop test data is available, otherwise false @@ -20,18 +20,6 @@ public static boolean isInteropTestDataAvailable() { return Files.exists(testDataPath); } - /** - * @return throws a SkipException if the interop test data is not available locally - */ - public static void assertHTSCodecsTestDataAvailable() { - if (!isInteropTestDataAvailable()) { - throw new SkipException( - String.format( - "No RANS Interop test data found at location: %s", - INTEROP_TEST_FILES_PATH)); - } - } - /** * @return the name and location of the local interop test data as specified by the * variable INTEROP_TEST_FILES_PATH diff --git a/src/test/java/htsjdk/utils/SamtoolsTestUtils.java b/src/test/java/htsjdk/utils/SamtoolsTestUtils.java index f70674d8cd..c144362250 100644 --- a/src/test/java/htsjdk/utils/SamtoolsTestUtils.java +++ b/src/test/java/htsjdk/utils/SamtoolsTestUtils.java @@ -1,7 +1,8 @@ package htsjdk.utils; -import htsjdk.samtools.util.*; - +import htsjdk.samtools.util.FileExtensions; +import htsjdk.samtools.util.ProcessExecutor; +import htsjdk.samtools.util.RuntimeIOException; import java.io.File; import java.io.IOException; import java.nio.file.Files; @@ -14,6 +15,7 @@ public class SamtoolsTestUtils { private static final String SAMTOOLS_BINARY_ENV_VARIABLE = "HTSJDK_SAMTOOLS_BIN"; public final static String expectedSamtoolsVersion = "1.14"; + public final static String expectedHtslibVersion = "1.14"; /** * @return true if samtools is available, otherwise false @@ -47,6 +49,11 @@ public static String getSamtoolsBin() { return samtoolsPath == null ? "/usr/local/bin/samtools" : samtoolsPath; } + public static String getCRAMInteropData() { + final String samtoolsPath = System.getenv(SAMTOOLS_BINARY_ENV_VARIABLE); + return samtoolsPath == null ? "../htscodecs/tests" : samtoolsPath + "/htslib-"+expectedHtslibVersion+"/htscodecs/tests"; + } + /** * Execute a samtools command line if a local samtools executable is available see {@link #isSamtoolsAvailable()}. * From cec6e3e536242494dfe07c02a3adc6342c2587c5 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Thu, 8 Sep 2022 17:39:10 -0400 Subject: [PATCH 43/76] Replace hex literals with bit flag masks in RANSInteropTest Data Provider --- .../compression/rans/RansOrder1DemoTest.java | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 src/test/java/htsjdk/samtools/cram/compression/rans/RansOrder1DemoTest.java diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansOrder1DemoTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansOrder1DemoTest.java new file mode 100644 index 0000000000..1d14b2f590 --- /dev/null +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansOrder1DemoTest.java @@ -0,0 +1,122 @@ +package htsjdk.samtools.cram.compression.rans; + +import htsjdk.HtsjdkTest; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; +import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; +import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Decode; +import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Encode; +import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Params; +import htsjdk.samtools.util.TestUtil; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.nio.ByteBuffer; +import java.util.Random; + +public class RansOrder1DemoTest extends HtsjdkTest { + private Random random = new Random(TestUtil.RANDOM_SEED); + + private static class TestDataEnvelope { + public final byte[] testArray; + public TestDataEnvelope(final byte[] testdata) { + this.testArray = testdata; + } + public String toString() { + return String.format("Array of size %d", testArray.length); + } + } + + + + @DataProvider(name="testDP") + public Object[][] getRansTestData() { + return new Object[][]{ + {new TestDataEnvelope(new byte[]{'h','e','e','e','e','l','l','l','o',})}, + { new TestDataEnvelope(new byte[] {0}) }, + {new TestDataEnvelope(new byte[]{2,101,108,3,2})}, +// + {new TestDataEnvelope(new byte[]{'h','e','e','e','e','e','e','e','e','e','e', + 'l','l','l','l','l','l','l','l','l','l','l','l','o',})}, + { new TestDataEnvelope(randomBytesFromGeometricDistribution(100, 0.1)) }, + { new TestDataEnvelope(randomBytesFromGeometricDistribution(1000, 0.01))}, // Small + { new TestDataEnvelope(randomBytesFromGeometricDistribution(100 * 1000 + 3, 0.01))} // Large + + }; + } + + private byte[] randomBytesFromGeometricDistribution(final int size, final double p) { + final byte[] data = new byte[size]; + for (int i = 0; i < data.length; i++) { + data[i] = drawByteFromGeometricDistribution(p); + } + return data; + } + + private byte drawByteFromGeometricDistribution(final double probability) { + final double rand = random.nextDouble(); + final double g = Math.ceil(Math.log(1 - rand) / Math.log(1 - probability)) - 1; + return (byte) g; + } + + + @Test(enabled = false,dataProvider = "testDP") + public void testRansNx16BuffersMeetBoundaryExpectations( + final TestDataEnvelope td) { + RANSEncode ransEncode = new RANSNx16Encode(); + RANSDecode ransDecode = new RANSNx16Decode(); +// RANSParams ransParams = new RANSNx16Params(0x40);// format = 64 (rle = 1, order = 0) +// RANSParams ransParams = new RANSNx16Params(0x41);// format = 65 (rle = 1, order = 1) + RANSParams ransParams = new RANSNx16Params(0x40);// format = 128 (pack = 1, order = 0) +// +// RANSParams ransParams = new RANSNx16Params(0x00); + +// // if we comment f++, t++ then this fails as expected with Buffer Underflow Exception +// // Next step -> make Xmax and dependent variables to long and try +// RANSEncode ransEncode = new RANS4x8Encode(); +// RANSDecode ransDecode = new RANS4x8Decode(); +// RANSParams ransParams = new RANS4x8Params(RANSParams.ORDER.ZERO); + + ByteBuffer inputData = ByteBuffer.wrap(td.testArray); + final ByteBuffer outBuffer = ransEncode.compress(inputData,ransParams); + + ByteBuffer uncompressed = ransDecode.uncompress(outBuffer); + // TODO: where is comp Flag -> freq first byte being written?? + inputData.rewind(); + Assert.assertEquals(inputData,uncompressed); + } + + + @Test(enabled = false,dataProvider = "testDP") + public void testRansNx16Tiny( + final TestDataEnvelope td) { + RANSEncode ransEncode = new RANSNx16Encode(); + RANSDecode ransDecode = new RANSNx16Decode(); + RANSParams ransParams = new RANSNx16Params(0x05); +// ByteBuffer inputData = ByteBuffer.wrap(td.testArray); +// +// final ByteBuffer outBuffer = ransEncode.compress(inputData,ransParams); +// +// ByteBuffer uncompressed = ransDecode.uncompress(outBuffer); +// inputData.rewind(); +// Assert.assertEquals(inputData,uncompressed); + final ByteBuffer in = ByteBuffer.wrap(td.testArray); + for (int size = 1; size < 100; size++) { + in.position(0); + in.limit(size); + final ByteBuffer compressed = ransEncode.compress(in, ransParams); + final ByteBuffer uncompressed = ransDecode.uncompress(compressed); + in.rewind(); + while (in.hasRemaining()) { + if (!uncompressed.hasRemaining()) { + Assert.fail("Premature end of uncompressed data."); + } + Assert.assertEquals(uncompressed.get(), in.get()); + } + Assert.assertFalse(uncompressed.hasRemaining()); + } + } + + +} \ No newline at end of file From 577d5d11895a05be571a0c2cbd355b7aeaacd39a Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Fri, 9 Sep 2022 14:15:47 -0400 Subject: [PATCH 44/76] Addressing the feedback so far --- .../cram/compression/rans/RANSParams.java | 2 +- .../rans/ransnx16/RANSNx16Encode.java | 8 +- ...stUtils.java => CRAMInteropTestUtils.java} | 2 +- .../htsjdk/samtools/cram/RANSInteropTest.java | 208 +++++------------- 4 files changed, 63 insertions(+), 157 deletions(-) rename src/test/java/htsjdk/samtools/cram/{RANSInteropTestUtils.java => CRAMInteropTestUtils.java} (96%) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSParams.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSParams.java index d664b24c3a..7d617d5249 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSParams.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSParams.java @@ -9,7 +9,7 @@ public static ORDER fromInt(final int orderValue) { try { return ORDER.values()[orderValue]; } catch (final ArrayIndexOutOfBoundsException e) { - throw new IllegalArgumentException("Unknown rANS order: " + orderValue); + throw new IllegalArgumentException("Unknown rANS order: " + orderValue, e); } } } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index ba7fe62859..19bd84053d 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -11,6 +11,11 @@ import java.nio.ByteOrder; public class RANSNx16Encode extends RANSEncode { + ///////////////////////////////////////////////////////////////////////////////////////////////// + // This is a naive implementation of RANS Nx16 Encoding + // Stripe flag is not implemented in the write implementation + ///////////////////////////////////////////////////////////////////////////////////////////////// + private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); private static final int MINIMUM__ORDER_1_SIZE = 4; @@ -31,11 +36,10 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN Utils.writeUint7(insize,outBuffer); } - // using inputBuffer as inBuffer is declared final ByteBuffer inputBuffer = inBuffer; // Stripe - // Encoding is not implemented for Stripe + // Stripe flag is not implemented in the write implementation if (ransNx16Params.getStripe()) { throw new CRAMException("RANSNx16 Encoding with Stripe Flag is not implemented."); } diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTestUtils.java b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java similarity index 96% rename from src/test/java/htsjdk/samtools/cram/RANSInteropTestUtils.java rename to src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java index 79b1b28c44..046622c84d 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTestUtils.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java @@ -9,7 +9,7 @@ * Interop test data is kept in a separate repository, currently at https://github.com/samtools/htscodecs * so it can be shared across htslib/samtools/htsjdk. */ -public class RANSInteropTestUtils { +public class CRAMInteropTestUtils { public static final String INTEROP_TEST_FILES_PATH = SamtoolsTestUtils.getCRAMInteropData(); /** diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index 59201e22c7..d16db945bb 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -29,165 +29,67 @@ import java.util.stream.Stream; /** - * Interop test data is kept in a separate repository, currently at https://github.com/samtools/htscodecs - * so it can be shared across htslib/samtools/htsjdk. + * RANSInteropTest tests if the htsjdk RANS4x8 and RANSNx16 implementations are interoperable + * with the htslib implementations. The test files for Interop tests is kept in a separate repository, + * currently at https://github.com/samtools/htscodecs so it can be shared across htslib/samtools/htsjdk. + * + * For native development env, the Interop test files are downloaded locally and made available at "../htscodecs/tests" + * For CI env, the Interop test files are made available from the existing samtools installation + * at "/samtools-1.14/htslib-1.14/htscodecs/tests" */ public class RANSInteropTest extends HtsjdkTest { + public static final String COMPRESSED_RANS4X8_DIR = "r4x8"; + public static final String COMPRESSED_RANSNX16_DIR = "r4x16"; ///////////////////////////////////////////////////////////////////////////////////////////////// // RANS tests ///////////////////////////////////////////////////////////////////////////////////////////////// - // TODO: the TestDataProviders tests fail if the hts codecs corpus isn't available. For time being, - // we fix this by adding some small test files, which would later be replaced by a more permanent - // solution like adding the tests directly from samtools/hts-codecs using git submodule - // RANS4x8 codecs and testdata public Object[][] getRANS4x8TestData() throws IOException { // cache/reuse this for each test case to eliminate excessive garbage collection - final RANS4x8Encode rans4x8Encode = new RANS4x8Encode(); - final RANS4x8Decode rans4x8Decode = new RANS4x8Decode(); + final List rans4x8ParamsOrderList = Arrays.asList( + RANSParams.ORDER.ZERO, + RANSParams.ORDER.ONE); final List testCases = new ArrayList<>(); getInteropRANSTestFiles() - .forEach(p -> - { - // RANS 4x8 order 0 - testCases.add(new Object[] { - p, - rans4x8Encode , - rans4x8Decode, - new RANS4x8Params(RANSParams.ORDER.ZERO), - "r4x8" // htscodecs directory where the RANS4x8 compressed files reside - }); - // RANS 4x8 order 1 - testCases.add(new Object[] { - p, - rans4x8Encode , - rans4x8Decode, - new RANS4x8Params(RANSParams.ORDER.ONE), - "r4x8" // htscodecs directory where the RANS4x8 compressed files reside - }); - }); + .forEach(path -> + rans4x8ParamsOrderList.stream().map(rans4x8ParamsOrder -> new Object[]{ + path, + new RANS4x8Encode(), + new RANS4x8Decode(), + new RANS4x8Params(rans4x8ParamsOrder), + COMPRESSED_RANS4X8_DIR + }).forEach(testCases::add)); return testCases.toArray(new Object[][]{}); } // RANSNx16 codecs and testdata public Object[][] getRANS4x16TestData() throws IOException { - final RANSNx16Encode ransNx16Encode = new RANSNx16Encode(); - final RANSNx16Decode ransNx16Decode = new RANSNx16Decode(); + final List ransNx16ParamsFormatFlagList = Arrays.asList( + 0x00, + RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.RLE_FLAG_MASK, + RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.N32_FLAG_MASK, + RANSNx16Params.N32_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.PACK_FLAG_MASK, + RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK, + RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.STRIPE_FLAG_MASK, + RANSNx16Params.STRIPE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK + ); final List testCases = new ArrayList<>(); getInteropRANSTestFiles() - .forEach(p -> - { - // RANS Nx16 order 0, none of the bit flags are set - testCases.add(new Object[] { - p, - ransNx16Encode, - ransNx16Decode , - new RANSNx16Params(0x00), - "r4x16" // htscodecs directory where the RANSNx16 compressed files reside - }); - - // RANS Nx16 order 0, bitflags = 0x40. rle flag is set - testCases.add(new Object[] { - p, - ransNx16Encode, - ransNx16Decode , - new RANSNx16Params(0x40), - "r4x16" // htscodecs directory where the RANSNx16 compressed files reside - }); - - // RANS Nx16 order 1, bitflags = 0x01 - testCases.add(new Object[] { - p, - ransNx16Encode, - ransNx16Decode , - new RANSNx16Params(0x01), - "r4x16" // htscodecs directory where the RANSNx16 compressed files reside - }); - - // RANS Nx16 order 1, bitflags = 0x04 - testCases.add(new Object[] { - p, - ransNx16Encode, - ransNx16Decode , - new RANSNx16Params(0x04), - "r4x16" // htscodecs directory where the RANSNx16 compressed files reside - }); - - // RANS Nx16 order 1, bitflags = 0x05 - testCases.add(new Object[] { - p, - ransNx16Encode, - ransNx16Decode , - new RANSNx16Params(0x05), - "r4x16" // htscodecs directory where the RANSNx16 compressed files reside - }); - - // RANS Nx16 order 1, bitflags = 0x41. rle flag is set - testCases.add(new Object[] { - p, - ransNx16Encode, - ransNx16Decode , - new RANSNx16Params(0x41), - "r4x16" // htscodecs directory where the RANSNx16 compressed files reside - }); - - // RANS Nx16 order 0, bitflags = 0x80. pack flag is set - testCases.add(new Object[] { - p, - ransNx16Encode, - ransNx16Decode , - new RANSNx16Params(0x80), - "r4x16" // htscodecs directory where the RANSNx16 compressed files reside - }); - - // RANS Nx16 order 1, bitflags = 0x81. pack flag is set - testCases.add(new Object[] { - p, - ransNx16Encode, - ransNx16Decode , - new RANSNx16Params(0x81), - "r4x16" // htscodecs directory where the RANSNx16 compressed files reside - }); - - // RANS Nx16 order 0, bitflags = 0xC0. rle flag is set, pack flag is set - testCases.add(new Object[] { - p, - ransNx16Encode, - ransNx16Decode , - new RANSNx16Params(0xC0), - "r4x16" // htscodecs directory where the RANSNx16 compressed files reside - }); - - // RANS Nx16 order 1, bitflags = 0xC1. rle flag is set, pack flag is set - testCases.add(new Object[] { - p, - ransNx16Encode, - ransNx16Decode , - new RANSNx16Params(0xC1), - "r4x16" // htscodecs directory where the RANSNx16 compressed files reside - }); - - // RANS Nx16 order 0, bitflags = 0x08. - testCases.add(new Object[] { - p, - ransNx16Encode, - ransNx16Decode , - new RANSNx16Params(0x08), - "r4x16" // htscodecs directory where the RANSNx16 compressed files reside - }); - - // RANS Nx16 order 1, bitflags = 0x09. - testCases.add(new Object[] { - p, - ransNx16Encode, - ransNx16Decode , - new RANSNx16Params(0x09), - "r4x16" // htscodecs directory where the RANSNx16 compressed files reside - }); - - }); + .forEach(path -> + ransNx16ParamsFormatFlagList.stream().map(ransNx16ParamsFormatFlag -> new Object[]{ + path, + new RANSNx16Encode(), + new RANSNx16Decode(), + new RANSNx16Params(ransNx16ParamsFormatFlag), + COMPRESSED_RANSNX16_DIR + }).forEach(testCases::add)); return testCases.toArray(new Object[][]{}); } @@ -201,9 +103,9 @@ public Object[][] getAllRansCodecs() throws IOException { // TODO: testHtslibVersion should depend on SamtoolsTestUtilsTest.testSamtoolsVersion @Test(description = "Test if CRAM Interop Test Data is available") public void testGetHTSCodecsCorpus() { - if (!RANSInteropTestUtils.isInteropTestDataAvailable()) { + if (!CRAMInteropTestUtils.isInteropTestDataAvailable()) { throw new SkipException(String.format("RANS Interop Test Data is not available at %s", - RANSInteropTestUtils.INTEROP_TEST_FILES_PATH)); + CRAMInteropTestUtils.INTEROP_TEST_FILES_PATH)); } } @@ -212,23 +114,23 @@ public void testGetHTSCodecsCorpus() { dataProvider = "allRansCodecsAndData", description = "Roundtrip using htsjdk RANS. Compare the output with the original file" ) public void testRANSRoundTrip( - final Path inputTestDataPath, + final Path uncompressedInteropPath, final RANSEncode ransEncode, final RANSDecode ransDecode, final RANSParams params, final String unusedCompressedDirname) throws IOException { - try (final InputStream is = Files.newInputStream(inputTestDataPath)) { + try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath)) { // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results - final ByteBuffer uncompressedBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(is))); + final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); // If Stripe Flag is set, skip the round trip test as encoding is not implemented for this case. if ((params.getFormatFlags() & RANSNx16Params.STRIPE_FLAG_MASK)==0) { - final ByteBuffer compressedBytes = ransEncode.compress(uncompressedBytes, params); - uncompressedBytes.rewind(); - Assert.assertEquals(ransDecode.uncompress(compressedBytes), uncompressedBytes); + final ByteBuffer compressedHtsjdkBytes = ransEncode.compress(uncompressedInteropBytes, params); + uncompressedInteropBytes.rewind(); + Assert.assertEquals(ransDecode.uncompress(compressedHtsjdkBytes), uncompressedInteropBytes); } } } @@ -262,7 +164,7 @@ public void testRANSPreCompressed( // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo Assert.assertEquals(uncompressedHtsjdkBytes, uncompressedInteropBytes); - } catch (NoSuchFileException ex){ + } catch (final NoSuchFileException ex){ throw new SkipException("Skipping testRANSPrecompressed as either input file " + "or precompressed file is missing.", ex); } @@ -272,7 +174,7 @@ public void testRANSPreCompressed( private List getInteropRANSTestFiles() throws IOException { final List paths = new ArrayList<>(); Files.newDirectoryStream( - RANSInteropTestUtils.getInteropTestDataLocation().resolve("dat"), + CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat"), path -> path.getFileName().startsWith("q4") || path.getFileName().startsWith("q8") || path.getFileName().startsWith("qvar") || @@ -304,12 +206,12 @@ final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { } // Given a test file name, map it to the corresponding rans compressed path - final Path getCompressedRANSPath(final String ransType,final Path inputTestDataPath, RANSParams params) { + final Path getCompressedRANSPath(final String ransType,final Path uncompressedInteropPath, RANSParams params) { // Example compressedFileName: r4x16/q4.193 // the substring after "." in the compressedFileName is the formatFlags (aka. the first byte of the compressed stream) - final String compressedFileName = String.format("%s/%s.%s", ransType, inputTestDataPath.getFileName(), params.getFormatFlags()); - return inputTestDataPath.getParent().resolve(compressedFileName); + final String compressedFileName = String.format("%s/%s.%s", ransType, uncompressedInteropPath.getFileName(), params.getFormatFlags()); + return uncompressedInteropPath.getParent().resolve(compressedFileName); } } \ No newline at end of file From 2db08787bcdbac76b4451bf8e87348d0f9c8ec86 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Fri, 9 Sep 2022 15:02:23 -0400 Subject: [PATCH 45/76] debug CI test failure --- scripts/install-samtools.sh | 2 ++ src/test/java/htsjdk/utils/SamtoolsTestUtils.java | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/install-samtools.sh b/scripts/install-samtools.sh index e847b9af79..e1964350f9 100755 --- a/scripts/install-samtools.sh +++ b/scripts/install-samtools.sh @@ -3,4 +3,6 @@ set -ex wget https://github.com/samtools/samtools/releases/download/1.14/samtools-1.14.tar.bz2 # CRAM Interop Tests are dependent on the test files in samtools-1.14/htslib-1.14/htscodes/tests/dat tar -xjvf samtools-1.14.tar.bz2 +echo "print current dir" +pwd cd samtools-1.14 && ./configure --prefix=/usr && make && sudo make install \ No newline at end of file diff --git a/src/test/java/htsjdk/utils/SamtoolsTestUtils.java b/src/test/java/htsjdk/utils/SamtoolsTestUtils.java index c144362250..8b6d68d569 100644 --- a/src/test/java/htsjdk/utils/SamtoolsTestUtils.java +++ b/src/test/java/htsjdk/utils/SamtoolsTestUtils.java @@ -51,7 +51,7 @@ public static String getSamtoolsBin() { public static String getCRAMInteropData() { final String samtoolsPath = System.getenv(SAMTOOLS_BINARY_ENV_VARIABLE); - return samtoolsPath == null ? "../htscodecs/tests" : samtoolsPath + "/htslib-"+expectedHtslibVersion+"/htscodecs/tests"; + return samtoolsPath == null ? "../htscodecs/tests" : "/samtools-"+expectedHtslibVersion+ "/htslib-"+expectedHtslibVersion+"/htscodecs/tests"; } /** From c8fb5500e4353d3c8108bbb52d36a2cab7715a04 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 13 Sep 2022 16:14:15 -0400 Subject: [PATCH 46/76] Fix the htscodecs path --- scripts/install-samtools.sh | 2 -- src/test/java/htsjdk/utils/SamtoolsTestUtils.java | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/install-samtools.sh b/scripts/install-samtools.sh index e1964350f9..e847b9af79 100755 --- a/scripts/install-samtools.sh +++ b/scripts/install-samtools.sh @@ -3,6 +3,4 @@ set -ex wget https://github.com/samtools/samtools/releases/download/1.14/samtools-1.14.tar.bz2 # CRAM Interop Tests are dependent on the test files in samtools-1.14/htslib-1.14/htscodes/tests/dat tar -xjvf samtools-1.14.tar.bz2 -echo "print current dir" -pwd cd samtools-1.14 && ./configure --prefix=/usr && make && sudo make install \ No newline at end of file diff --git a/src/test/java/htsjdk/utils/SamtoolsTestUtils.java b/src/test/java/htsjdk/utils/SamtoolsTestUtils.java index 8b6d68d569..64ea493cdd 100644 --- a/src/test/java/htsjdk/utils/SamtoolsTestUtils.java +++ b/src/test/java/htsjdk/utils/SamtoolsTestUtils.java @@ -51,7 +51,7 @@ public static String getSamtoolsBin() { public static String getCRAMInteropData() { final String samtoolsPath = System.getenv(SAMTOOLS_BINARY_ENV_VARIABLE); - return samtoolsPath == null ? "../htscodecs/tests" : "/samtools-"+expectedHtslibVersion+ "/htslib-"+expectedHtslibVersion+"/htscodecs/tests"; + return samtoolsPath == null ? "../htscodecs/tests" : "./samtools-"+expectedHtslibVersion+ "/htslib-"+expectedHtslibVersion+"/htscodecs/tests"; } /** From d51f9ce72a41244944613f3f5609eae6f778d1a4 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Fri, 30 Sep 2022 20:25:51 -0400 Subject: [PATCH 47/76] rename methods that return boolean to start with 'is' instead of 'get' --- .../compression/rans/ransnx16/RANSNx16Decode.java | 14 +++++++------- .../compression/rans/ransnx16/RANSNx16Encode.java | 10 +++++----- .../compression/rans/ransnx16/RANSNx16Params.java | 10 +++++----- .../samtools/cram/compression/rans/RansTest.java | 8 ++++---- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index d784c9413c..a674c3758d 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -33,10 +33,10 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { final RANSNx16Params ransNx16Params = new RANSNx16Params(formatFlags); // if nosz flag is set, then uncompressed size is not recorded. - outSize = ransNx16Params.getNosz() ? outSize : Utils.readUint7(inBuffer); + outSize = ransNx16Params.isNosz() ? outSize : Utils.readUint7(inBuffer); // if stripe, then decodeStripe - if (ransNx16Params.getStripe()) { + if (ransNx16Params.isStripe()) { return decodeStripe(inBuffer, outSize); } @@ -44,7 +44,7 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { int packDataLength = 0; int numSymbols = 0; int[] packMappingTable = new int[0]; - if (ransNx16Params.getPack()){ + if (ransNx16Params.isPack()){ packDataLength = outSize; numSymbols = inBuffer.get() & 0xFF; @@ -65,7 +65,7 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { int uncompressedRLEOutputLength = 0; final int[] rleSymbols = new int[Constants.NUMBER_OF_SYMBOLS]; ByteBuffer uncompressedRLEMetaData = null; - if (ransNx16Params.getRLE()){ + if (ransNx16Params.isRLE()){ uncompressedRLEMetaDataLength = Utils.readUint7(inBuffer); uncompressedRLEOutputLength = outSize; outSize = Utils.readUint7(inBuffer); @@ -74,7 +74,7 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { } // If CAT is set then, the input is uncompressed - if (ransNx16Params.getCAT()){ + if (ransNx16Params.isCAT()){ byte[] data = new byte[outSize]; inBuffer.get( data,0, outSize); return ByteBuffer.wrap(data); @@ -95,12 +95,12 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { } // if rle, then decodeRLE - if (ransNx16Params.getRLE() && uncompressedRLEMetaData!=null ){ + if (ransNx16Params.isRLE() && uncompressedRLEMetaData!=null ){ outBuffer = decodeRLE(outBuffer,rleSymbols,uncompressedRLEMetaData, uncompressedRLEOutputLength); } // if pack, then decodePack - if (ransNx16Params.getPack() && packMappingTable.length > 0) { + if (ransNx16Params.isPack() && packMappingTable.length > 0) { outBuffer = decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); } return outBuffer; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index 19bd84053d..f87bd3be43 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -30,7 +30,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN // TODO: add methods to handle various flags // NoSize - if (!ransNx16Params.getNosz()) { + if (!ransNx16Params.isNosz()) { // original size is not recorded int insize = inBuffer.remaining(); Utils.writeUint7(insize,outBuffer); @@ -40,12 +40,12 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN // Stripe // Stripe flag is not implemented in the write implementation - if (ransNx16Params.getStripe()) { + if (ransNx16Params.isStripe()) { throw new CRAMException("RANSNx16 Encoding with Stripe Flag is not implemented."); } // Pack - if (ransNx16Params.getPack()) { + if (ransNx16Params.isPack()) { final int[] frequencyTable = new int[Constants.NUMBER_OF_SYMBOLS]; final int inSize = inputBuffer.remaining(); for (int i = 0; i < inSize; i ++) { @@ -69,12 +69,12 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN } // RLE - if (ransNx16Params.getRLE()){ + if (ransNx16Params.isRLE()){ inputBuffer = encodeRLE(inputBuffer, ransNx16Params, outBuffer); } - if (ransNx16Params.getCAT()) { + if (ransNx16Params.isCAT()) { // Data is uncompressed outBuffer.put(inputBuffer); outBuffer.limit(outBuffer.position()); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java index 023b7ad06d..6591292646 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java @@ -45,27 +45,27 @@ public int getNumInterleavedRANSStates(){ return ((formatFlags & N32_FLAG_MASK) == 0) ? 4 : 32; } - public boolean getStripe(){ + public boolean isStripe(){ // multiway interleaving of byte streams return ((formatFlags & STRIPE_FLAG_MASK)!=0); } - public boolean getNosz(){ + public boolean isNosz(){ // original size is not recorded (for use by Stripe) return ((formatFlags & NOSZ_FLAG_MASK)!=0); } - public boolean getCAT(){ + public boolean isCAT(){ // Data is uncompressed return ((formatFlags & CAT_FLAG_MASK)!=0); } - public boolean getRLE(){ + public boolean isRLE(){ // Run length encoding, with runs and literals encoded separately return ((formatFlags & RLE_FLAG_MASK)!=0); } - public boolean getPack(){ + public boolean isPack(){ // Pack 2, 4, 8 or infinite symbols per byte return ((formatFlags & PACK_FLAG_MASK)!=0); } diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index c8721f669f..9343f36158 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -230,13 +230,13 @@ public void testRansNx16BuffersMeetBoundaryExpectations( numSym++; } } - if (params.getPack() & (numSym == 0 | numSym > 16)) { + if (params.isPack() & (numSym == 0 | numSym > 16)) { Assert.assertEquals(FormatFlags, params.getFormatFlags() & ~RANSNx16Params.PACK_FLAG_MASK); } else { Assert.assertEquals(FormatFlags, params.getFormatFlags()); } // if nosz flag is not set, then the uncompressed size is recorded - if (!params.getNosz()){ + if (!params.isNosz()){ Assert.assertEquals(Utils.readUint7(compressed), size); } } @@ -279,13 +279,13 @@ public void testRansNx16Header( numSym++; } } - if (params.getPack() & (numSym == 0 | numSym > 16)) { + if (params.isPack() & (numSym == 0 | numSym > 16)) { Assert.assertEquals(FormatFlags, (byte) (params.getFormatFlags() & ~RANSNx16Params.PACK_FLAG_MASK)); } else { Assert.assertEquals(FormatFlags, (byte) params.getFormatFlags()); } // if nosz flag is not set, then the uncompressed size is recorded - if (!params.getNosz()){ + if (!params.isNosz()){ Assert.assertEquals(Utils.readUint7(compressed), size); } } From 789f50c5aa07e8a610bc70497ecd04bc6ded6077 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 17 Oct 2022 16:08:30 -0400 Subject: [PATCH 48/76] debug --- .../cram/compression/rans/ransnx16/RANSNx16Encode.java | 4 +--- .../cram/compression/rans/ransnx16/RANSNx16Params.java | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index f87bd3be43..562c7415a4 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -24,11 +24,9 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN return EMPTY_BUFFER; } final ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining()); - final int formatFlags = ransNx16Params.getFormatFlags() & 0xFF; + final int formatFlags = ransNx16Params.getFormatFlags(); outBuffer.put((byte) (formatFlags)); // one byte for formatFlags - // TODO: add methods to handle various flags - // NoSize if (!ransNx16Params.isNosz()) { // original size is not recorded diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java index 6591292646..ea17b23d51 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java @@ -36,7 +36,7 @@ public ORDER getOrder() { } public int getFormatFlags(){ - // Least significant 7 bits of the format + // first byte of the encoded stream return formatFlags & FORMAT_FLAG_MASK; } From 6b0c4f1c9086f5923481eaf0907e39c16b209f0d Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Mon, 31 Oct 2022 16:20:22 -0400 Subject: [PATCH 49/76] Addressing the feedback from 10/25/22 --- .../rans/rans4x8/RANS4x8Params.java | 5 +- .../htsjdk/samtools/cram/RANSInteropTest.java | 77 +++++++---- .../compression/rans/RansOrder1DemoTest.java | 122 ------------------ .../cram/compression/rans/RansTest.java | 118 +++++++---------- .../java/htsjdk/utils/SamtoolsTestUtils.java | 2 +- 5 files changed, 105 insertions(+), 219 deletions(-) delete mode 100644 src/test/java/htsjdk/samtools/cram/compression/rans/RansOrder1DemoTest.java diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java index 024ae37d53..beab9fe6c7 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java @@ -1,6 +1,7 @@ package htsjdk.samtools.cram.compression.rans.rans4x8; import htsjdk.samtools.cram.compression.rans.RANSParams; +import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Params; public class RANS4x8Params implements RANSParams { @@ -21,7 +22,9 @@ public ORDER getOrder() { } public int getFormatFlags(){ - return order.ordinal(); + return order == ORDER.ONE ? + RANSNx16Params.ORDER_FLAG_MASK : + 0; } } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index d16db945bb..b1c9d5a887 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -41,13 +41,12 @@ public class RANSInteropTest extends HtsjdkTest { public static final String COMPRESSED_RANS4X8_DIR = "r4x8"; public static final String COMPRESSED_RANSNX16_DIR = "r4x16"; - ///////////////////////////////////////////////////////////////////////////////////////////////// - // RANS tests - ///////////////////////////////////////////////////////////////////////////////////////////////// - // RANS4x8 codecs and testdata public Object[][] getRANS4x8TestData() throws IOException { - // cache/reuse this for each test case to eliminate excessive garbage collection + + // params: + // uncompressed testfile path, RANS encoder, RANS decoder, + // RANS params, compressed testfile directory name final List rans4x8ParamsOrderList = Arrays.asList( RANSParams.ORDER.ZERO, RANSParams.ORDER.ONE); @@ -65,7 +64,11 @@ public Object[][] getRANS4x8TestData() throws IOException { } // RANSNx16 codecs and testdata - public Object[][] getRANS4x16TestData() throws IOException { + public Object[][] getRANSNx16TestData() throws IOException { + + // params: + // uncompressed testfile path, RANS encoder, RANS decoder, + // RANS params, compressed testfile directory name final List ransNx16ParamsFormatFlagList = Arrays.asList( 0x00, RANSNx16Params.ORDER_FLAG_MASK, @@ -76,10 +79,30 @@ public Object[][] getRANS4x16TestData() throws IOException { RANSNx16Params.PACK_FLAG_MASK, RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK, - RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK); + final List testCases = new ArrayList<>(); + getInteropRANSTestFiles() + .forEach(path -> + ransNx16ParamsFormatFlagList.stream().map(ransNx16ParamsFormatFlag -> new Object[]{ + path, + new RANSNx16Encode(), + new RANSNx16Decode(), + new RANSNx16Params(ransNx16ParamsFormatFlag), + COMPRESSED_RANSNX16_DIR + }).forEach(testCases::add)); + return testCases.toArray(new Object[][]{}); + } + + public Object[][] getRansNx16DecodeOnlyTestData() throws IOException { + + // params: + // uncompressed testfile path, RANS encoder, RANS decoder, + // RANS params, compressed testfile directory name + + // Stripe is implemented in the Decoder. It is not implemented in the Encoder. + final List ransNx16ParamsFormatFlagList = Arrays.asList( RANSNx16Params.STRIPE_FLAG_MASK, - RANSNx16Params.STRIPE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK - ); + RANSNx16Params.STRIPE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK); final List testCases = new ArrayList<>(); getInteropRANSTestFiles() .forEach(path -> @@ -93,14 +116,29 @@ public Object[][] getRANS4x16TestData() throws IOException { return testCases.toArray(new Object[][]{}); } + @DataProvider(name = "allRansCodecsAndDataForRoundtrip") + public Object[][] getAllRansCodecsForRoundTrip() throws IOException { + + // params: + // uncompressed testfile path, RANS encoder, RANS decoder, + // RANS params, compressed testfile directory name + + // Since, Stripe is not implemented in the Encoder, + // we don't test round tripping for the cases where Stripe Flag = 1 + return Stream.concat(Arrays.stream(getRANS4x8TestData()), Arrays.stream(getRANSNx16TestData())) + .toArray(Object[][]::new); + } + @DataProvider(name = "allRansCodecsAndData") public Object[][] getAllRansCodecs() throws IOException { - // concatenate RANS4x8 and RANSNx16 codecs and testdata - return Stream.concat(Arrays.stream(getRANS4x8TestData()), Arrays.stream(getRANS4x16TestData())) + + // params: + // uncompressed testfile path, RANS encoder, RANS decoder, + // RANS params, compressed testfile directory name + return Stream.concat(Arrays.stream(getAllRansCodecsForRoundTrip()), Arrays.stream(getRansNx16DecodeOnlyTestData())) .toArray(Object[][]::new); } - // TODO: testHtslibVersion should depend on SamtoolsTestUtilsTest.testSamtoolsVersion @Test(description = "Test if CRAM Interop Test Data is available") public void testGetHTSCodecsCorpus() { if (!CRAMInteropTestUtils.isInteropTestDataAvailable()) { @@ -111,7 +149,7 @@ public void testGetHTSCodecsCorpus() { @Test ( dependsOnMethods = "testGetHTSCodecsCorpus", - dataProvider = "allRansCodecsAndData", + dataProvider = "allRansCodecsAndDataForRoundtrip", description = "Roundtrip using htsjdk RANS. Compare the output with the original file" ) public void testRANSRoundTrip( final Path uncompressedInteropPath, @@ -125,13 +163,6 @@ public void testRANSRoundTrip( // by filtering out the embedded newlines, and then round trip through RANS and compare the // results final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); - - // If Stripe Flag is set, skip the round trip test as encoding is not implemented for this case. - if ((params.getFormatFlags() & RANSNx16Params.STRIPE_FLAG_MASK)==0) { - final ByteBuffer compressedHtsjdkBytes = ransEncode.compress(uncompressedInteropBytes, params); - uncompressedInteropBytes.rewind(); - Assert.assertEquals(ransDecode.uncompress(compressedHtsjdkBytes), uncompressedInteropBytes); - } } } @@ -170,7 +201,7 @@ public void testRANSPreCompressed( } } - // return a list of all RANS test data files in the InteropTest/RANS directory + // return a list of all RANS test data files in the htscodecs/tests directory private List getInteropRANSTestFiles() throws IOException { final List paths = new ArrayList<>(); Files.newDirectoryStream( @@ -184,7 +215,7 @@ private List getInteropRANSTestFiles() throws IOException { } // the input files have embedded newlines that the test remove before round-tripping... - final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { + private final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { // 1. filters new lines if any. // 2. "q40+dir" file has an extra column delimited by tab. This column provides READ1 vs READ2 flag. // This file is also new-line separated. The extra column, '\t' and '\n' are filtered. @@ -206,7 +237,7 @@ final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { } // Given a test file name, map it to the corresponding rans compressed path - final Path getCompressedRANSPath(final String ransType,final Path uncompressedInteropPath, RANSParams params) { + private final Path getCompressedRANSPath(final String ransType,final Path uncompressedInteropPath, RANSParams params) { // Example compressedFileName: r4x16/q4.193 // the substring after "." in the compressedFileName is the formatFlags (aka. the first byte of the compressed stream) diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansOrder1DemoTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansOrder1DemoTest.java deleted file mode 100644 index 1d14b2f590..0000000000 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansOrder1DemoTest.java +++ /dev/null @@ -1,122 +0,0 @@ -package htsjdk.samtools.cram.compression.rans; - -import htsjdk.HtsjdkTest; -import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; -import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; -import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; -import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Decode; -import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Encode; -import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Params; -import htsjdk.samtools.util.TestUtil; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.nio.ByteBuffer; -import java.util.Random; - -public class RansOrder1DemoTest extends HtsjdkTest { - private Random random = new Random(TestUtil.RANDOM_SEED); - - private static class TestDataEnvelope { - public final byte[] testArray; - public TestDataEnvelope(final byte[] testdata) { - this.testArray = testdata; - } - public String toString() { - return String.format("Array of size %d", testArray.length); - } - } - - - - @DataProvider(name="testDP") - public Object[][] getRansTestData() { - return new Object[][]{ - {new TestDataEnvelope(new byte[]{'h','e','e','e','e','l','l','l','o',})}, - { new TestDataEnvelope(new byte[] {0}) }, - {new TestDataEnvelope(new byte[]{2,101,108,3,2})}, -// - {new TestDataEnvelope(new byte[]{'h','e','e','e','e','e','e','e','e','e','e', - 'l','l','l','l','l','l','l','l','l','l','l','l','o',})}, - { new TestDataEnvelope(randomBytesFromGeometricDistribution(100, 0.1)) }, - { new TestDataEnvelope(randomBytesFromGeometricDistribution(1000, 0.01))}, // Small - { new TestDataEnvelope(randomBytesFromGeometricDistribution(100 * 1000 + 3, 0.01))} // Large - - }; - } - - private byte[] randomBytesFromGeometricDistribution(final int size, final double p) { - final byte[] data = new byte[size]; - for (int i = 0; i < data.length; i++) { - data[i] = drawByteFromGeometricDistribution(p); - } - return data; - } - - private byte drawByteFromGeometricDistribution(final double probability) { - final double rand = random.nextDouble(); - final double g = Math.ceil(Math.log(1 - rand) / Math.log(1 - probability)) - 1; - return (byte) g; - } - - - @Test(enabled = false,dataProvider = "testDP") - public void testRansNx16BuffersMeetBoundaryExpectations( - final TestDataEnvelope td) { - RANSEncode ransEncode = new RANSNx16Encode(); - RANSDecode ransDecode = new RANSNx16Decode(); -// RANSParams ransParams = new RANSNx16Params(0x40);// format = 64 (rle = 1, order = 0) -// RANSParams ransParams = new RANSNx16Params(0x41);// format = 65 (rle = 1, order = 1) - RANSParams ransParams = new RANSNx16Params(0x40);// format = 128 (pack = 1, order = 0) -// -// RANSParams ransParams = new RANSNx16Params(0x00); - -// // if we comment f++, t++ then this fails as expected with Buffer Underflow Exception -// // Next step -> make Xmax and dependent variables to long and try -// RANSEncode ransEncode = new RANS4x8Encode(); -// RANSDecode ransDecode = new RANS4x8Decode(); -// RANSParams ransParams = new RANS4x8Params(RANSParams.ORDER.ZERO); - - ByteBuffer inputData = ByteBuffer.wrap(td.testArray); - final ByteBuffer outBuffer = ransEncode.compress(inputData,ransParams); - - ByteBuffer uncompressed = ransDecode.uncompress(outBuffer); - // TODO: where is comp Flag -> freq first byte being written?? - inputData.rewind(); - Assert.assertEquals(inputData,uncompressed); - } - - - @Test(enabled = false,dataProvider = "testDP") - public void testRansNx16Tiny( - final TestDataEnvelope td) { - RANSEncode ransEncode = new RANSNx16Encode(); - RANSDecode ransDecode = new RANSNx16Decode(); - RANSParams ransParams = new RANSNx16Params(0x05); -// ByteBuffer inputData = ByteBuffer.wrap(td.testArray); -// -// final ByteBuffer outBuffer = ransEncode.compress(inputData,ransParams); -// -// ByteBuffer uncompressed = ransDecode.uncompress(outBuffer); -// inputData.rewind(); -// Assert.assertEquals(inputData,uncompressed); - final ByteBuffer in = ByteBuffer.wrap(td.testArray); - for (int size = 1; size < 100; size++) { - in.position(0); - in.limit(size); - final ByteBuffer compressed = ransEncode.compress(in, ransParams); - final ByteBuffer uncompressed = ransDecode.uncompress(compressed); - in.rewind(); - while (in.hasRemaining()) { - if (!uncompressed.hasRemaining()) { - Assert.fail("Premature end of uncompressed data."); - } - Assert.assertEquals(uncompressed.get(), in.get()); - } - Assert.assertFalse(uncompressed.hasRemaining()); - } - } - - -} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 9343f36158..2dde86ef4c 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -14,7 +14,9 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.Random; import java.util.function.BiFunction; import java.util.stream.Stream; @@ -63,8 +65,9 @@ public Object[][] getRansTestData() { } public Object[][] getRansTestDataTinySmallLarge() { + + // params: test data, lower limit, upper limit return new Object[][]{ - // params: test data, lower limit, upper limit { new TestDataEnvelope(randomBytesFromGeometricDistribution(100, 0.1)), 1, 100 }, // Tiny { new TestDataEnvelope(randomBytesFromGeometricDistribution(1000, 0.01)), 4, 1000 }, // Small { new TestDataEnvelope(randomBytesFromGeometricDistribution(100 * 1000 + 3, 0.01)), 100 * 1000 + 3 - 4, 100 * 1000 + 3 } // Large @@ -73,87 +76,66 @@ public Object[][] getRansTestDataTinySmallLarge() { @DataProvider(name="rans4x8") public Object[][] getRans4x8Codecs() { - final RANS4x8Encode rans4x8Encode = new RANS4x8Encode(); - final RANS4x8Decode rans4x8Decode = new RANS4x8Decode(); + + // params: RANS encoder, RANS decoder, RANS params return new Object[][]{ - {rans4x8Encode, rans4x8Decode, new RANS4x8Params(RANSParams.ORDER.ZERO)}, // RANS4x8 Order 0 - {rans4x8Encode, rans4x8Decode, new RANS4x8Params(RANSParams.ORDER.ONE)} // RANS4x8 Order 1 + {new RANS4x8Encode(), new RANS4x8Decode(), new RANS4x8Params(RANSParams.ORDER.ZERO)}, + {new RANS4x8Encode(), new RANS4x8Decode(), new RANS4x8Params(RANSParams.ORDER.ONE)} }; } @DataProvider(name="ransNx16") public Object[][] getRansNx16Codecs() { - final RANSNx16Encode ransNx16Encode = new RANSNx16Encode(); - final RANSNx16Decode ransNx16Decode = new RANSNx16Decode(); - // TODO: More formatFlags values i.e, combinations of bit flags will be added later - return new Object[][]{ - - //RANSNx16 formatFlags(first byte) 0: Order 0, N = 4, CAT false - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x00)} , - - //RANSNx16 formatFlags(first byte) 1: Order 1, N = 4, CAT false - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x01)} , - - //RANSNx16 formatFlags(first byte) 4: Order 0, N = 32, CAT false - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x04)} , - - //RANSNx16 formatFlags(first byte) 5: Order 1, N = 32, CAT false - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x05)} , - - //RANSNx16 formatFlags(first byte) 32: Order 0, N = 4, CAT true - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x20)} , - - //RANSNx16 formatFlags(first byte) 33: Order 1, N = 4, CAT true - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x21)} , - - //RANSNx16 formatFlags(first byte) 36: Order 0, N = 32, CAT true - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x24)} , - - //RANSNx16 formatFlags(first byte) 37: Order 1, N = 32, CAT true - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x25)} , - - //RANSNx16 formatFlags(first byte) 64: Order 0, N = 4, CAT false, RLE = true - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x40)} , - - //RANSNx16 formatFlags(first byte) 65: Order 1, N = 4, CAT false, RLE = true - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x41)} , - //RANSNx16 formatFlags(first byte) 128: Order 0, N = 4, CAT false, RLE = false, Pack = true - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x80)} , - - //RANSNx16 formatFlags(first byte) 129: Order 1, N = 4, CAT false, RLE = false, Pack = true - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x81)} , - - //RANSNx16 formatFlags(first byte) 192: Order 0, N = 4, CAT false, RLE = true, Pack = true - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0xC0)} , - - //RANSNx16 formatFlags(first byte) 193: Order 1, N = 4, CAT false, RLE = true, Pack = true - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0xC1)} , - - }; + // params: RANS encoder, RANS decoder, RANS params + final List ransNx16ParamsFormatFlagList = Arrays.asList( + 0x00, + RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.N32_FLAG_MASK, + RANSNx16Params.N32_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.CAT_FLAG_MASK, + RANSNx16Params.CAT_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.CAT_FLAG_MASK | RANSNx16Params.N32_FLAG_MASK, + RANSNx16Params.CAT_FLAG_MASK | RANSNx16Params.N32_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.RLE_FLAG_MASK, + RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.PACK_FLAG_MASK, + RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, + RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK, + RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK + ); + final List testCases = new ArrayList<>(); + for (Integer ransNx16ParamsFormatFlag : ransNx16ParamsFormatFlagList) { + Object[] objects = new Object[]{ + new RANSNx16Encode(), + new RANSNx16Decode(), + new RANSNx16Params(ransNx16ParamsFormatFlag) + }; + testCases.add(objects); + } + return testCases.toArray(new Object[][]{}); } public Object[][] getRansNx16DecodeOnlyCodecs() { - final RANSNx16Encode ransNx16Encode = new RANSNx16Encode(); - final RANSNx16Decode ransNx16Decode = new RANSNx16Decode(); - return new Object[][]{ - //RANSNx16 formatFlags(first byte) 8: Order 0, N = 4, CAT false, RLE = false, Pack = false, Stripe = True - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x08)}, - - //RANSNx16 formatFlags(first byte) 9: Order 1, N = 4, CAT false, RLE = false, Pack = false, Stripe = True - {ransNx16Encode, ransNx16Decode, new RANSNx16Params(0x09)} + // params: RANS encoder, RANS decoder, RANS params + return new Object[][]{ + {new RANSNx16Encode(), new RANSNx16Decode(), new RANSNx16Params(RANSNx16Params.STRIPE_FLAG_MASK)}, + {new RANSNx16Encode(), new RANSNx16Decode(), new RANSNx16Params(RANSNx16Params.ORDER_FLAG_MASK|RANSNx16Params.STRIPE_FLAG_MASK)} }; } @DataProvider(name="RansNx16DecodeOnlyAndData") public Object[][] getRansNx16DecodeOnlyAndData() { - // this data provider provides all the testdata except empty input for RANS Nx16 codec + // params: RANS encoder, RANS decoder, RANS params, test data + // this data provider provides all the non-empty testdata input for RANS Nx16 codec return TestNGUtils.cartesianProduct(getRansNx16DecodeOnlyCodecs(), getRansTestData()); } public Object[][] getAllRansCodecs() { + + // params: RANSEncode, RANSDecode, RANSParams // concatenate RANS4x8 and RANSNx16 codecs return Stream.concat(Arrays.stream(getRans4x8Codecs()), Arrays.stream(getRansNx16Codecs())) .toArray(Object[][]::new); @@ -162,8 +144,8 @@ public Object[][] getAllRansCodecs() { @DataProvider(name="allRansAndData") public Object[][] getAllRansAndData() { + // params: RANSEncode, RANSDecode, RANSParams, test data // this data provider provides all the testdata for all of RANS codecs - // params: RANSEncode, RANSDecode, RANSParams, data return Stream.concat( Arrays.stream(TestNGUtils.cartesianProduct(getAllRansCodecs(), getRansTestData())), Arrays.stream(TestNGUtils.cartesianProduct(getAllRansCodecs(), getRansEmptyTestData()))) @@ -173,8 +155,8 @@ public Object[][] getAllRansAndData() { @DataProvider(name="allRansAndDataForTinySmallLarge") public Object[][] getAllRansAndDataForTinySmallLarge() { + // params: RANSEncode, RANSDecode, RANSParams, test data, lower limit, upper limit // this data provider provides Tiny, Small and Large testdata for all of RANS codecs - // params: RANSEncode, RANSDecode, RANSParams, data, lower limit, upper limit return TestNGUtils.cartesianProduct(getAllRansCodecs(), getRansTestDataTinySmallLarge()); } @@ -324,15 +306,7 @@ private static void ransRoundTrip( final ByteBuffer compressed = ransEncode.compress(data, params); final ByteBuffer uncompressed = ransDecode.uncompress(compressed); data.rewind(); -// Assert.assertEquals(data, uncompressed); - - while (data.hasRemaining()) { - if (!uncompressed.hasRemaining()) { - Assert.fail("Premature end of uncompressed data."); - } - Assert.assertEquals(uncompressed.get(), data.get()); - } - Assert.assertFalse(uncompressed.hasRemaining()); + Assert.assertEquals(data, uncompressed); } public ByteBuffer ransBufferMeetBoundaryExpectations( diff --git a/src/test/java/htsjdk/utils/SamtoolsTestUtils.java b/src/test/java/htsjdk/utils/SamtoolsTestUtils.java index 64ea493cdd..eda5fcb46e 100644 --- a/src/test/java/htsjdk/utils/SamtoolsTestUtils.java +++ b/src/test/java/htsjdk/utils/SamtoolsTestUtils.java @@ -51,7 +51,7 @@ public static String getSamtoolsBin() { public static String getCRAMInteropData() { final String samtoolsPath = System.getenv(SAMTOOLS_BINARY_ENV_VARIABLE); - return samtoolsPath == null ? "../htscodecs/tests" : "./samtools-"+expectedHtslibVersion+ "/htslib-"+expectedHtslibVersion+"/htscodecs/tests"; + return samtoolsPath == null ? "../htscodecs/tests" : "./samtools-"+expectedSamtoolsVersion+ "/htslib-"+expectedHtslibVersion+"/htscodecs/tests"; } /** From e72147a76006ba0c5ab4f7150152717c05abcb18 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 22 Nov 2022 13:35:12 -0500 Subject: [PATCH 50/76] undo inadvertent deletion of RANSInterop roundtrip test logic --- src/test/java/htsjdk/samtools/cram/RANSInteropTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index b1c9d5a887..516464d7d8 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -163,6 +163,9 @@ public void testRANSRoundTrip( // by filtering out the embedded newlines, and then round trip through RANS and compare the // results final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer compressedHtsjdkBytes = ransEncode.compress(uncompressedInteropBytes, params); + uncompressedInteropBytes.rewind(); + Assert.assertEquals(ransDecode.uncompress(compressedHtsjdkBytes), uncompressedInteropBytes); } } From 850280dc3f8722bf9138725adf51a412ed5799df Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 21 Mar 2023 14:29:11 -0400 Subject: [PATCH 51/76] debug - add decodePack and decodeRLE on top of CAT flag --- .../rans/ransnx16/RANSNx16Decode.java | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index a674c3758d..709a0d9530 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -17,10 +17,10 @@ public class RANSNx16Decode extends RANSDecode { private static final int FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK = 0x01; public ByteBuffer uncompress(final ByteBuffer inBuffer) { - return uncompressStream(inBuffer, 0); + return uncompress(inBuffer, 0); } - public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { + public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } @@ -44,12 +44,12 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { int packDataLength = 0; int numSymbols = 0; int[] packMappingTable = new int[0]; - if (ransNx16Params.isPack()){ + if (ransNx16Params.isPack()) { packDataLength = outSize; numSymbols = inBuffer.get() & 0xFF; // if (numSymbols > 16 or numSymbols==0), raise exception - if (numSymbols <= 16 && numSymbols!=0) { + if (numSymbols <= 16 && numSymbols != 0) { packMappingTable = new int[numSymbols]; for (int i = 0; i < numSymbols; i++) { packMappingTable[i] = inBuffer.get() & 0xFF; @@ -65,23 +65,23 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { int uncompressedRLEOutputLength = 0; final int[] rleSymbols = new int[Constants.NUMBER_OF_SYMBOLS]; ByteBuffer uncompressedRLEMetaData = null; - if (ransNx16Params.isRLE()){ + if (ransNx16Params.isRLE()) { uncompressedRLEMetaDataLength = Utils.readUint7(inBuffer); uncompressedRLEOutputLength = outSize; outSize = Utils.readUint7(inBuffer); // TODO: maybe move decodeRLEMeta in-line - uncompressedRLEMetaData = decodeRLEMeta(inBuffer,ransNx16Params,uncompressedRLEMetaDataLength,rleSymbols); + uncompressedRLEMetaData = decodeRLEMeta(inBuffer, ransNx16Params, uncompressedRLEMetaDataLength, rleSymbols); } + ByteBuffer outBuffer = ByteBuffer.allocate(outSize); // If CAT is set then, the input is uncompressed - if (ransNx16Params.isCAT()){ + if (ransNx16Params.isCAT()) { byte[] data = new byte[outSize]; - inBuffer.get( data,0, outSize); - return ByteBuffer.wrap(data); - } - else { - ByteBuffer outBuffer = ByteBuffer.allocate(outSize); - if (outSize!=0) { + inBuffer.get(data, 0, outSize); + outBuffer = ByteBuffer.wrap(data); + } else { + outBuffer = ByteBuffer.allocate(outSize); + if (outSize != 0) { switch (ransNx16Params.getOrder()) { case ZERO: uncompressOrder0WayN(inBuffer, outBuffer, outSize, ransNx16Params); @@ -93,18 +93,18 @@ public ByteBuffer uncompressStream(final ByteBuffer inBuffer, int outSize) { throw new RuntimeException("Unknown rANS order: " + ransNx16Params.getOrder()); } } + } - // if rle, then decodeRLE - if (ransNx16Params.isRLE() && uncompressedRLEMetaData!=null ){ - outBuffer = decodeRLE(outBuffer,rleSymbols,uncompressedRLEMetaData, uncompressedRLEOutputLength); - } + // if rle, then decodeRLE + if (ransNx16Params.isRLE() && uncompressedRLEMetaData != null) { + outBuffer = decodeRLE(outBuffer, rleSymbols, uncompressedRLEMetaData, uncompressedRLEOutputLength); + } - // if pack, then decodePack - if (ransNx16Params.isPack() && packMappingTable.length > 0) { - outBuffer = decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); - } - return outBuffer; + // if pack, then decodePack + if (ransNx16Params.isPack() && packMappingTable.length > 0) { + outBuffer = decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); } + return outBuffer; } private ByteBuffer uncompressOrder0WayN( @@ -466,7 +466,7 @@ private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){ ulen[j]++; } - T[j] = uncompressStream(inBuffer, ulen[j]); + T[j] = uncompress(inBuffer, ulen[j]); } // Transpose From 3f84b2a1ed83664bd49e32313e141ea089525b23 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 21 Mar 2023 14:43:15 -0400 Subject: [PATCH 52/76] rewind outBuffer before it is returned --- .../samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java | 2 +- .../samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index 0b6b4e1cad..f1df2a4972 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -123,7 +123,7 @@ private ByteBuffer uncompressOrder0Way4(final ByteBuffer inBuffer, final ByteBuf break; } - outBuffer.position(0); + outBuffer.rewind(); return outBuffer; } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 709a0d9530..a9e30d0d96 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -161,7 +161,7 @@ private ByteBuffer uncompressOrder0WayN( remSize --; reverseIndex ++; } - outBuffer.position(0); + outBuffer.rewind(); return outBuffer; } From 43145d4f43d13d6f3533b9c4fc2a9e78edf03d3b Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Tue, 21 Mar 2023 14:53:27 -0400 Subject: [PATCH 53/76] remove duplicate outBuffer creation --- .../samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index a9e30d0d96..9c1c0c0cae 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -80,7 +80,6 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { inBuffer.get(data, 0, outSize); outBuffer = ByteBuffer.wrap(data); } else { - outBuffer = ByteBuffer.allocate(outSize); if (outSize != 0) { switch (ransNx16Params.getOrder()) { case ZERO: From f7e6c57acffb32478031eb24c2bf6a751919c23e Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Thu, 19 Oct 2023 17:50:39 -0400 Subject: [PATCH 54/76] Addressing the feedback from oct 11, 2023 except implementing the Stripe Flag in RANS Nx16 encoder --- scripts/install-samtools.sh | 2 +- .../rans/ransnx16/RANSNx16Decode.java | 62 ++--- .../rans/ransnx16/RANSNx16Encode.java | 31 +-- .../htsjdk/samtools/cram/RANSInteropTest.java | 236 +++++++++--------- 4 files changed, 162 insertions(+), 169 deletions(-) diff --git a/scripts/install-samtools.sh b/scripts/install-samtools.sh index e847b9af79..97238f6d2f 100755 --- a/scripts/install-samtools.sh +++ b/scripts/install-samtools.sh @@ -1,6 +1,6 @@ #!/bin/sh set -ex wget https://github.com/samtools/samtools/releases/download/1.14/samtools-1.14.tar.bz2 -# CRAM Interop Tests are dependent on the test files in samtools-1.14/htslib-1.14/htscodes/tests/dat +# Note that the CRAM Interop Tests are dependent on the test files in samtools-1.14/htslib-1.14/htscodecs/tests/dat tar -xjvf samtools-1.14.tar.bz2 cd samtools-1.14 && ./configure --prefix=/usr && make && sudo make install \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 9c1c0c0cae..21326af78e 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -5,7 +5,6 @@ import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSDecode; import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol; -import htsjdk.samtools.cram.compression.rans.RANSParams; import htsjdk.samtools.cram.compression.rans.Utils; import java.nio.ByteBuffer; @@ -17,17 +16,17 @@ public class RANSNx16Decode extends RANSDecode { private static final int FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK = 0x01; public ByteBuffer uncompress(final ByteBuffer inBuffer) { + + // For RANS decoding, the bytes are read in little endian from the input stream + inBuffer.order(ByteOrder.LITTLE_ENDIAN); return uncompress(inBuffer, 0); } - public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { + private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } - // For RANS decoding, the bytes are read in little endian from the input stream - inBuffer.order(ByteOrder.LITTLE_ENDIAN); - // the first byte of compressed stream gives the formatFlags final int formatFlags = inBuffer.get() & 0xFF; final RANSNx16Params ransNx16Params = new RANSNx16Params(formatFlags); @@ -70,7 +69,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { uncompressedRLEOutputLength = outSize; outSize = Utils.readUint7(inBuffer); // TODO: maybe move decodeRLEMeta in-line - uncompressedRLEMetaData = decodeRLEMeta(inBuffer, ransNx16Params, uncompressedRLEMetaDataLength, rleSymbols); + uncompressedRLEMetaData = decodeRLEMeta(inBuffer, uncompressedRLEMetaDataLength, rleSymbols); } ByteBuffer outBuffer = ByteBuffer.allocate(outSize); @@ -86,7 +85,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { uncompressOrder0WayN(inBuffer, outBuffer, outSize, ransNx16Params); break; case ONE: - uncompressOrder1WayN(inBuffer, outBuffer, outSize, ransNx16Params); + uncompressOrder1WayN(inBuffer, outBuffer, ransNx16Params); break; default: throw new RuntimeException("Unknown rANS order: " + ransNx16Params.getOrder()); @@ -167,7 +166,6 @@ private ByteBuffer uncompressOrder0WayN( private ByteBuffer uncompressOrder1WayN( final ByteBuffer inBuffer, final ByteBuffer outBuffer, - final int outSize, final RANSNx16Params ransNx16Params) { initializeRANSDecoder(); @@ -286,7 +284,7 @@ private void readFrequencyTableOrder0( private void readFrequencyTableOrder1( final ByteBuffer cp, - int shift) { + final int shift) { final int[][] frequencies = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; final ArithmeticDecoder[] D = getD(); final RANSDecodingSymbol[][] decodingSymbols = getDecodingSymbols(); @@ -349,7 +347,10 @@ private static int[] readAlphabet(final ByteBuffer cp){ return alphabet; } - private ByteBuffer decodeRLEMeta(final ByteBuffer inBuffer , final RANSParams ransParams, final int uncompressedRLEMetaDataLength, final int[] rleSymbols) { + private ByteBuffer decodeRLEMeta( + final ByteBuffer inBuffer, + final int uncompressedRLEMetaDataLength, + final int[] rleSymbols) { ByteBuffer uncompressedRLEMetaData; final int compressedRLEMetaDataLength; if ((uncompressedRLEMetaDataLength & 0x01)!=0) { @@ -370,7 +371,7 @@ private ByteBuffer decodeRLEMeta(final ByteBuffer inBuffer , final RANSParams ra int numRLESymbols = uncompressedRLEMetaData.get() & 0xFF; if (numRLESymbols == 0) { - numRLESymbols = 256; + numRLESymbols = Constants.NUMBER_OF_SYMBOLS; } for (int i = 0; i< numRLESymbols; i++) { rleSymbols[uncompressedRLEMetaData.get() & 0xFF] = 1; @@ -378,7 +379,11 @@ private ByteBuffer decodeRLEMeta(final ByteBuffer inBuffer , final RANSParams ra return uncompressedRLEMetaData; } - private ByteBuffer decodeRLE(ByteBuffer inBuffer , final int[] rleSymbols, final ByteBuffer uncompressedRLEMetaData, int uncompressedRLEOutputLength) { + private ByteBuffer decodeRLE( + ByteBuffer inBuffer, + final int[] rleSymbols, + final ByteBuffer uncompressedRLEMetaData, + final int uncompressedRLEOutputLength) { ByteBuffer rleOutBuffer = ByteBuffer.allocate(uncompressedRLEOutputLength); int j = 0; for(int i = 0; j< uncompressedRLEOutputLength; i++){ @@ -396,7 +401,11 @@ private ByteBuffer decodeRLE(ByteBuffer inBuffer , final int[] rleSymbols, final return inBuffer; } - private ByteBuffer decodePack(ByteBuffer inBuffer, final int[] packMappingTable, int numSymbols, int uncompressedPackOutputLength) { + private ByteBuffer decodePack( + ByteBuffer inBuffer, + final int[] packMappingTable, + final int numSymbols, + final int uncompressedPackOutputLength) { ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength); int j = 0; @@ -445,38 +454,35 @@ else if (numSymbols <= 16){ return inBuffer; } - private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){ - + private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){ final int numInterleaveStreams = inBuffer.get() & 0xFF; // retrieve lengths of compressed interleaved streams - int[] clen = new int[numInterleaveStreams]; + final int[] compressedLengths = new int[numInterleaveStreams]; for ( int j=0; j j){ - ulen[j]++; + uncompressedLengths[j]++; } - T[j] = uncompress(inBuffer, ulen[j]); + TransposedData[j] = uncompress(inBuffer, uncompressedLengths[j]); } // Transpose - ByteBuffer out = ByteBuffer.allocate(outSize); + final ByteBuffer outBuffer = ByteBuffer.allocate(outSize); for (int j = 0; j 0) { + if (runCounts[i]>0) { numRLESymbols++; } } @@ -540,7 +536,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer ,final RANSParams ransPar if (numRLESymbols==0) { // Format cannot cope with zero RLE symbols, so pick one! numRLESymbols = 1; - rleSymbols[0] = 1; + runCounts[0] = 1; } // create rleMetaData buffer to store rle metadata. @@ -548,11 +544,11 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer ,final RANSParams ransPar // TODO: How did we come up with this calculation for Buffer size? numRLESymbols+1+inputSize ByteBuffer rleMetaData = ByteBuffer.allocate(numRLESymbols+1+inputSize); // rleMetaData - // write number of symbols that are run length encoded to the outBuffer + // write number of symbols that are run length encoded rleMetaData.put((byte) numRLESymbols); - for (int i=0; i<256; i++){ - if (rleSymbols[i] >0){ + for (int i=0; i0){ // write the symbols that are run length encoded rleMetaData.put((byte) i); } @@ -566,7 +562,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer ,final RANSParams ransPar for (int i = 0; i < inputSize; i++) { encodedData.put(encodedDataIdx++,inBuffer.get(i)); - if (rleSymbols[inBuffer.get(i)&0xFF]>0) { + if (runCounts[inBuffer.get(i)&0xFF]>0) { lastSymbol = inBuffer.get(i) & 0xFF; int run = 0; @@ -585,7 +581,6 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer ,final RANSParams ransPar encodedData.limit(encodedDataIdx); // limit and rewind - // TODO: check if position of rleMetadata is at the end of the buffer as expected rleMetaData.limit(rleMetaData.position()); rleMetaData.rewind(); diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index 516464d7d8..906d8dd45b 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -33,7 +33,7 @@ * with the htslib implementations. The test files for Interop tests is kept in a separate repository, * currently at https://github.com/samtools/htscodecs so it can be shared across htslib/samtools/htsjdk. * - * For native development env, the Interop test files are downloaded locally and made available at "../htscodecs/tests" + * For local development env, the Interop test files must be downloaded locally and made available at "../htscodecs/tests" * For CI env, the Interop test files are made available from the existing samtools installation * at "/samtools-1.14/htslib-1.14/htscodecs/tests" */ @@ -42,105 +42,57 @@ public class RANSInteropTest extends HtsjdkTest { public static final String COMPRESSED_RANSNX16_DIR = "r4x16"; // RANS4x8 codecs and testdata - public Object[][] getRANS4x8TestData() throws IOException { + public Object[][] get4x8TestCases() throws IOException { // params: - // uncompressed testfile path, RANS encoder, RANS decoder, - // RANS params, compressed testfile directory name - final List rans4x8ParamsOrderList = Arrays.asList( - RANSParams.ORDER.ZERO, - RANSParams.ORDER.ONE); + // compressed testfile path, uncompressed testfile path, + // RANS encoder, RANS decoder, RANS params final List testCases = new ArrayList<>(); - getInteropRANSTestFiles() - .forEach(path -> - rans4x8ParamsOrderList.stream().map(rans4x8ParamsOrder -> new Object[]{ - path, - new RANS4x8Encode(), - new RANS4x8Decode(), - new RANS4x8Params(rans4x8ParamsOrder), - COMPRESSED_RANS4X8_DIR - }).forEach(testCases::add)); + for (Path path : getInteropRansCompressedFilePaths(COMPRESSED_RANS4X8_DIR)) { + Object[] objects = new Object[]{ + path, + getRansUnCompressedFilePath(path), + new RANS4x8Encode(), + new RANS4x8Decode(), + getRans4x8Params(path) + }; + testCases.add(objects); + } return testCases.toArray(new Object[][]{}); } // RANSNx16 codecs and testdata - public Object[][] getRANSNx16TestData() throws IOException { + public Object[][] getNx16TestCases() throws IOException { // params: - // uncompressed testfile path, RANS encoder, RANS decoder, - // RANS params, compressed testfile directory name - final List ransNx16ParamsFormatFlagList = Arrays.asList( - 0x00, - RANSNx16Params.ORDER_FLAG_MASK, - RANSNx16Params.RLE_FLAG_MASK, - RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, - RANSNx16Params.N32_FLAG_MASK, - RANSNx16Params.N32_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, - RANSNx16Params.PACK_FLAG_MASK, - RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, - RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK, - RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK); + // compressed testfile path, uncompressed testfile path, + // RANS encoder, RANS decoder, RANS params final List testCases = new ArrayList<>(); - getInteropRANSTestFiles() - .forEach(path -> - ransNx16ParamsFormatFlagList.stream().map(ransNx16ParamsFormatFlag -> new Object[]{ - path, - new RANSNx16Encode(), - new RANSNx16Decode(), - new RANSNx16Params(ransNx16ParamsFormatFlag), - COMPRESSED_RANSNX16_DIR - }).forEach(testCases::add)); - return testCases.toArray(new Object[][]{}); - } - - public Object[][] getRansNx16DecodeOnlyTestData() throws IOException { - - // params: - // uncompressed testfile path, RANS encoder, RANS decoder, - // RANS params, compressed testfile directory name - - // Stripe is implemented in the Decoder. It is not implemented in the Encoder. - final List ransNx16ParamsFormatFlagList = Arrays.asList( - RANSNx16Params.STRIPE_FLAG_MASK, - RANSNx16Params.STRIPE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK); - final List testCases = new ArrayList<>(); - getInteropRANSTestFiles() - .forEach(path -> - ransNx16ParamsFormatFlagList.stream().map(ransNx16ParamsFormatFlag -> new Object[]{ - path, - new RANSNx16Encode(), - new RANSNx16Decode(), - new RANSNx16Params(ransNx16ParamsFormatFlag), - COMPRESSED_RANSNX16_DIR - }).forEach(testCases::add)); + for (Path path : getInteropRansCompressedFilePaths(COMPRESSED_RANSNX16_DIR)) { + Object[] objects = new Object[]{ + path, + getRansUnCompressedFilePath(path), + new RANSNx16Encode(), + new RANSNx16Decode(), + getRansNx16Params(path) + }; + testCases.add(objects); + } return testCases.toArray(new Object[][]{}); } - @DataProvider(name = "allRansCodecsAndDataForRoundtrip") - public Object[][] getAllRansCodecsForRoundTrip() throws IOException { - - // params: - // uncompressed testfile path, RANS encoder, RANS decoder, - // RANS params, compressed testfile directory name - - // Since, Stripe is not implemented in the Encoder, - // we don't test round tripping for the cases where Stripe Flag = 1 - return Stream.concat(Arrays.stream(getRANS4x8TestData()), Arrays.stream(getRANSNx16TestData())) - .toArray(Object[][]::new); - } - - @DataProvider(name = "allRansCodecsAndData") - public Object[][] getAllRansCodecs() throws IOException { + @DataProvider(name = "roundTripTestCases") + public Object[][] getRoundTripTestCases() throws IOException { // params: - // uncompressed testfile path, RANS encoder, RANS decoder, - // RANS params, compressed testfile directory name - return Stream.concat(Arrays.stream(getAllRansCodecsForRoundTrip()), Arrays.stream(getRansNx16DecodeOnlyTestData())) + // compressed testfile path, uncompressed testfile path, + // RANS encoder, RANS decoder, RANS params + return Stream.concat(Arrays.stream(get4x8TestCases()), Arrays.stream(getNx16TestCases())) .toArray(Object[][]::new); } @Test(description = "Test if CRAM Interop Test Data is available") - public void testGetHTSCodecsCorpus() { + public void testHtsCodecsCorpusIsAvailable() { if (!CRAMInteropTestUtils.isInteropTestDataAvailable()) { throw new SkipException(String.format("RANS Interop Test Data is not available at %s", CRAMInteropTestUtils.INTEROP_TEST_FILES_PATH)); @@ -148,49 +100,55 @@ public void testGetHTSCodecsCorpus() { } @Test ( - dependsOnMethods = "testGetHTSCodecsCorpus", - dataProvider = "allRansCodecsAndDataForRoundtrip", + dependsOnMethods = "testHtsCodecsCorpusIsAvailable", + dataProvider = "roundTripTestCases", description = "Roundtrip using htsjdk RANS. Compare the output with the original file" ) public void testRANSRoundTrip( - final Path uncompressedInteropPath, + final Path unusedcompressedFilePath, + final Path uncompressedFilePath, final RANSEncode ransEncode, final RANSDecode ransDecode, - final RANSParams params, - final String unusedCompressedDirname) throws IOException { - try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath)) { + final RANSParams params) throws IOException { + try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedFilePath)) { // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); - final ByteBuffer compressedHtsjdkBytes = ransEncode.compress(uncompressedInteropBytes, params); - uncompressedInteropBytes.rewind(); - Assert.assertEquals(ransDecode.uncompress(compressedHtsjdkBytes), uncompressedInteropBytes); + + // Stripe Flag is not implemented in RANSNx16 Encoder. + // The encoder throws CRAMException if Stripe Flag is used. + if (params instanceof RANSNx16Params){ + RANSNx16Params ransNx16Params = (RANSNx16Params) params; + if (ransNx16Params.isStripe()) { + Assert.assertThrows(CRAMException.class, () -> ransEncode.compress(uncompressedInteropBytes, params)); + } + } else { + final ByteBuffer compressedHtsjdkBytes = ransEncode.compress(uncompressedInteropBytes, params); + uncompressedInteropBytes.rewind(); + Assert.assertEquals(ransDecode.uncompress(compressedHtsjdkBytes), uncompressedInteropBytes); + } } } @Test ( - dependsOnMethods = "testGetHTSCodecsCorpus", - dataProvider = "allRansCodecsAndData", - description = "Compress the original file using htsjdk RANS and compare it with the existing compressed file. " + - "Uncompress the existing compressed file using htsjdk RANS and compare it with the original file.") - public void testRANSPreCompressed( + dependsOnMethods = "testHtsCodecsCorpusIsAvailable", + dataProvider = "roundTripTestCases", + description = "Uncompress the existing compressed file using htsjdk RANS and compare it with the original file.") + public void testDecodeOnly( + final Path compressedFilePath, final Path uncompressedInteropPath, - final RANSEncode unused, + final RANSEncode unusedRansEncode, final RANSDecode ransDecode, - final RANSParams params, - final String compressedInteropDirName) throws IOException { - - final Path preCompressedInteropPath = getCompressedRANSPath(compressedInteropDirName,uncompressedInteropPath, params); - + final RANSParams unusedRansParams) throws IOException { try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); - final InputStream preCompressedInteropStream = Files.newInputStream(preCompressedInteropPath) + final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath) ) { + // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); - final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream)); // Use htsjdk to uncompress the precompressed file from htscodecs repo @@ -204,19 +162,6 @@ public void testRANSPreCompressed( } } - // return a list of all RANS test data files in the htscodecs/tests directory - private List getInteropRANSTestFiles() throws IOException { - final List paths = new ArrayList<>(); - Files.newDirectoryStream( - CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat"), - path -> path.getFileName().startsWith("q4") || - path.getFileName().startsWith("q8") || - path.getFileName().startsWith("qvar") || - path.getFileName().startsWith("q40+dir")) - .forEach(path -> paths.add(path)); - return paths; - } - // the input files have embedded newlines that the test remove before round-tripping... private final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { // 1. filters new lines if any. @@ -239,13 +184,60 @@ private final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOExce } } - // Given a test file name, map it to the corresponding rans compressed path - private final Path getCompressedRANSPath(final String ransType,final Path uncompressedInteropPath, RANSParams params) { + // return a list of all encoded test data files in the htscodecs/tests/dat/ directory + private List getInteropRansCompressedFilePaths(final String compressedDir) throws IOException { + final List paths = new ArrayList<>(); + Files.newDirectoryStream( + CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat/"+compressedDir), + path -> Files.isRegularFile(path)) + .forEach(path -> paths.add(path)); + return paths; + } + + // Given a compressed test file path, return the corresponding uncompressed file path + public static final Path getRansUnCompressedFilePath(final Path compressedInteropPath) { + String uncompressedFileName = getUncompressedFileName(compressedInteropPath.getFileName().toString()); + // Example compressedInteropPath: ../dat/r4x8/q4.1 => unCompressedFilePath: ../dat/q4 + return compressedInteropPath.getParent().getParent().resolve(uncompressedFileName); + } + + public static final String getUncompressedFileName(final String compressedFileName) { + // Returns original filename from compressed file name + int lastDotIndex = compressedFileName.lastIndexOf("."); + if (lastDotIndex >= 0) { + String fileName = compressedFileName.substring(0, lastDotIndex); + return fileName; + } else { + throw new CRAMException("The format of the compressed File Name is not as expected. " + + "The name of the compressed file should contain a period followed by a number that" + + "indicates the order of compression. Actual compressed file name = "+ compressedFileName); + } + } - // Example compressedFileName: r4x16/q4.193 - // the substring after "." in the compressedFileName is the formatFlags (aka. the first byte of the compressed stream) - final String compressedFileName = String.format("%s/%s.%s", ransType, uncompressedInteropPath.getFileName(), params.getFormatFlags()); - return uncompressedInteropPath.getParent().resolve(compressedFileName); + public static final RANSParams getRans4x8Params(final Path compressedInteropPath){ + // Returns RANSParams from compressed file path + final String compressedFileName = compressedInteropPath.getFileName().toString(); + final int lastDotIndex = compressedFileName.lastIndexOf("."); + if (lastDotIndex >= 0 && lastDotIndex < compressedFileName.length() - 1) { + return new RANS4x8Params(RANSParams.ORDER.fromInt(Integer.parseInt(compressedFileName.substring(lastDotIndex + 1)))); + } else { + throw new CRAMException("The format of the compressed File Name is not as expected. " + + "The name of the compressed file should contain a period followed by a number that" + + "indicates the order of compression. Actual compressed file name = "+ compressedFileName); + } + } + + public static final RANSParams getRansNx16Params(final Path compressedInteropPath){ + // Returns RANSParams from compressed file path + final String compressedFileName = compressedInteropPath.getFileName().toString(); + final int lastDotIndex = compressedFileName.lastIndexOf("."); + if (lastDotIndex >= 0 && lastDotIndex < compressedFileName.length() - 1) { + return new RANSNx16Params(Integer.parseInt(compressedFileName.substring(lastDotIndex + 1))); + } else { + throw new CRAMException("The format of the compressed File Name is not as expected. " + + "The name of the compressed file should contain a period followed by a number that" + + "indicates the order of compression. Actual compressed file name = "+ compressedFileName); + } } } \ No newline at end of file From f9041e8df77acff5f5f77e11adb350626090374e Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Thu, 26 Oct 2023 14:26:02 -0400 Subject: [PATCH 55/76] Move common methods to CRAMInteropTestUtils class --- .../samtools/cram/CRAMInteropTestUtils.java | 70 ++++++++++++ .../htsjdk/samtools/cram/RANSInteropTest.java | 105 +++--------------- 2 files changed, 84 insertions(+), 91 deletions(-) diff --git a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java index 046622c84d..bcc7b9ce96 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java @@ -1,8 +1,13 @@ package htsjdk.samtools.cram; +import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + import htsjdk.utils.SamtoolsTestUtils; /** @@ -28,4 +33,69 @@ public static Path getInteropTestDataLocation() { return Paths.get(INTEROP_TEST_FILES_PATH); } + // the input files have embedded newlines that the test remove before round-tripping... + protected static final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { + // 1. filters new lines if any. + // 2. "q40+dir" file has an extra column delimited by tab. This column provides READ1 vs READ2 flag. + // This file is also new-line separated. The extra column, '\t' and '\n' are filtered. + try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + int skip = 0; + for (final byte b : rawBytes) { + if (b == '\t'){ + skip = 1; + } + if (b == '\n') { + skip = 0; + } + if (skip == 0 && b !='\n') { + baos.write(b); + } + } + return baos.toByteArray(); + } + } + + // return a list of all encoded test data files in the htscodecs/tests/dat/ directory + protected static List getInteropCompressedFilePaths(final String compressedDir) throws IOException { + final List paths = new ArrayList<>(); + Files.newDirectoryStream( + CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat/"+compressedDir), + path -> Files.isRegularFile(path)) + .forEach(path -> paths.add(path)); + return paths; + } + + // Given a compressed test file path, return the corresponding uncompressed file path + protected static final Path getUnCompressedFilePath(final Path compressedInteropPath) { + String uncompressedFileName = getUncompressedFileName(compressedInteropPath.getFileName().toString()); + // Example compressedInteropPath: ../dat/r4x8/q4.1 => unCompressedFilePath: ../dat/q4 + return compressedInteropPath.getParent().getParent().resolve(uncompressedFileName); + } + + private static final String getUncompressedFileName(final String compressedFileName) { + // Returns original filename from compressed file name + int lastDotIndex = compressedFileName.lastIndexOf("."); + if (lastDotIndex >= 0) { + String fileName = compressedFileName.substring(0, lastDotIndex); + return fileName; + } else { + throw new CRAMException("The format of the compressed File Name is not as expected. " + + "The name of the compressed file should contain a period followed by a number that" + + "indicates the order of compression. Actual compressed file name = "+ compressedFileName); + } + } + + protected static final int getParamsFormatFlags(final Path compressedInteropPath){ + // Returns formatFlags from compressed file path + final String compressedFileName = compressedInteropPath.getFileName().toString(); + final int lastDotIndex = compressedFileName.lastIndexOf("."); + if (lastDotIndex >= 0 && lastDotIndex < compressedFileName.length() - 1) { + return Integer.parseInt(compressedFileName.substring(lastDotIndex + 1)); + } else { + throw new CRAMException("The format of the compressed File Name is not as expected. " + + "The name of the compressed file should contain a period followed by a number that" + + "indicates the order of compression. Actual compressed file name = "+ compressedFileName); + } + } + } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index 906d8dd45b..0b1b428048 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -16,7 +16,6 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; @@ -28,6 +27,11 @@ import java.util.List; import java.util.stream.Stream; +import static htsjdk.samtools.cram.CRAMInteropTestUtils.filterEmbeddedNewlines; +import static htsjdk.samtools.cram.CRAMInteropTestUtils.getInteropCompressedFilePaths; +import static htsjdk.samtools.cram.CRAMInteropTestUtils.getParamsFormatFlags; +import static htsjdk.samtools.cram.CRAMInteropTestUtils.getUnCompressedFilePath; + /** * RANSInteropTest tests if the htsjdk RANS4x8 and RANSNx16 implementations are interoperable * with the htslib implementations. The test files for Interop tests is kept in a separate repository, @@ -48,13 +52,13 @@ public Object[][] get4x8TestCases() throws IOException { // compressed testfile path, uncompressed testfile path, // RANS encoder, RANS decoder, RANS params final List testCases = new ArrayList<>(); - for (Path path : getInteropRansCompressedFilePaths(COMPRESSED_RANS4X8_DIR)) { + for (Path path : getInteropCompressedFilePaths(COMPRESSED_RANS4X8_DIR)) { Object[] objects = new Object[]{ path, - getRansUnCompressedFilePath(path), + getUnCompressedFilePath(path), new RANS4x8Encode(), new RANS4x8Decode(), - getRans4x8Params(path) + new RANS4x8Params(RANSParams.ORDER.fromInt(getParamsFormatFlags(path))) }; testCases.add(objects); } @@ -68,13 +72,13 @@ public Object[][] getNx16TestCases() throws IOException { // compressed testfile path, uncompressed testfile path, // RANS encoder, RANS decoder, RANS params final List testCases = new ArrayList<>(); - for (Path path : getInteropRansCompressedFilePaths(COMPRESSED_RANSNX16_DIR)) { + for (Path path : getInteropCompressedFilePaths(COMPRESSED_RANSNX16_DIR)) { Object[] objects = new Object[]{ path, - getRansUnCompressedFilePath(path), + getUnCompressedFilePath(path), new RANSNx16Encode(), new RANSNx16Decode(), - getRansNx16Params(path) + new RANSNx16Params(getParamsFormatFlags(path)) }; testCases.add(objects); } @@ -118,11 +122,8 @@ public void testRANSRoundTrip( // Stripe Flag is not implemented in RANSNx16 Encoder. // The encoder throws CRAMException if Stripe Flag is used. - if (params instanceof RANSNx16Params){ - RANSNx16Params ransNx16Params = (RANSNx16Params) params; - if (ransNx16Params.isStripe()) { - Assert.assertThrows(CRAMException.class, () -> ransEncode.compress(uncompressedInteropBytes, params)); - } + if (params instanceof RANSNx16Params && ((RANSNx16Params) params).isStripe()) { + Assert.assertThrows(CRAMException.class, () -> ransEncode.compress(uncompressedInteropBytes, params)); } else { final ByteBuffer compressedHtsjdkBytes = ransEncode.compress(uncompressedInteropBytes, params); uncompressedInteropBytes.rewind(); @@ -157,87 +158,9 @@ public void testDecodeOnly( // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo Assert.assertEquals(uncompressedHtsjdkBytes, uncompressedInteropBytes); } catch (final NoSuchFileException ex){ - throw new SkipException("Skipping testRANSPrecompressed as either input file " + + throw new SkipException("Skipping testDecodeOnly as either input file " + "or precompressed file is missing.", ex); } } - // the input files have embedded newlines that the test remove before round-tripping... - private final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException { - // 1. filters new lines if any. - // 2. "q40+dir" file has an extra column delimited by tab. This column provides READ1 vs READ2 flag. - // This file is also new-line separated. The extra column, '\t' and '\n' are filtered. - try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) { - int skip = 0; - for (final byte b : rawBytes) { - if (b == '\t'){ - skip = 1; - } - if (b == '\n') { - skip = 0; - } - if (skip == 0 && b !='\n') { - baos.write(b); - } - } - return baos.toByteArray(); - } - } - - // return a list of all encoded test data files in the htscodecs/tests/dat/ directory - private List getInteropRansCompressedFilePaths(final String compressedDir) throws IOException { - final List paths = new ArrayList<>(); - Files.newDirectoryStream( - CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat/"+compressedDir), - path -> Files.isRegularFile(path)) - .forEach(path -> paths.add(path)); - return paths; - } - - // Given a compressed test file path, return the corresponding uncompressed file path - public static final Path getRansUnCompressedFilePath(final Path compressedInteropPath) { - String uncompressedFileName = getUncompressedFileName(compressedInteropPath.getFileName().toString()); - // Example compressedInteropPath: ../dat/r4x8/q4.1 => unCompressedFilePath: ../dat/q4 - return compressedInteropPath.getParent().getParent().resolve(uncompressedFileName); - } - - public static final String getUncompressedFileName(final String compressedFileName) { - // Returns original filename from compressed file name - int lastDotIndex = compressedFileName.lastIndexOf("."); - if (lastDotIndex >= 0) { - String fileName = compressedFileName.substring(0, lastDotIndex); - return fileName; - } else { - throw new CRAMException("The format of the compressed File Name is not as expected. " + - "The name of the compressed file should contain a period followed by a number that" + - "indicates the order of compression. Actual compressed file name = "+ compressedFileName); - } - } - - public static final RANSParams getRans4x8Params(final Path compressedInteropPath){ - // Returns RANSParams from compressed file path - final String compressedFileName = compressedInteropPath.getFileName().toString(); - final int lastDotIndex = compressedFileName.lastIndexOf("."); - if (lastDotIndex >= 0 && lastDotIndex < compressedFileName.length() - 1) { - return new RANS4x8Params(RANSParams.ORDER.fromInt(Integer.parseInt(compressedFileName.substring(lastDotIndex + 1)))); - } else { - throw new CRAMException("The format of the compressed File Name is not as expected. " + - "The name of the compressed file should contain a period followed by a number that" + - "indicates the order of compression. Actual compressed file name = "+ compressedFileName); - } - } - - public static final RANSParams getRansNx16Params(final Path compressedInteropPath){ - // Returns RANSParams from compressed file path - final String compressedFileName = compressedInteropPath.getFileName().toString(); - final int lastDotIndex = compressedFileName.lastIndexOf("."); - if (lastDotIndex >= 0 && lastDotIndex < compressedFileName.length() - 1) { - return new RANSNx16Params(Integer.parseInt(compressedFileName.substring(lastDotIndex + 1))); - } else { - throw new CRAMException("The format of the compressed File Name is not as expected. " + - "The name of the compressed file should contain a period followed by a number that" + - "indicates the order of compression. Actual compressed file name = "+ compressedFileName); - } - } - } \ No newline at end of file From 7126507be43ba2f2ee1d159665c3ab116f155da7 Mon Sep 17 00:00:00 2001 From: yash-puligundla Date: Fri, 1 Dec 2023 15:47:30 -0500 Subject: [PATCH 56/76] Addressing the feedback from Nov 7 and Nov 20 - part 1 --- .../compression/rans/ArithmeticDecoder.java | 2 +- .../compression/rans/RANSDecodingSymbol.java | 27 ++-- .../cram/compression/rans/RANSEncode.java | 3 +- .../compression/rans/RANSEncodingSymbol.java | 47 +++---- .../samtools/cram/compression/rans/Utils.java | 55 +++----- .../rans/rans4x8/RANS4x8Decode.java | 28 ++-- .../rans/rans4x8/RANS4x8Encode.java | 34 ++--- .../rans/rans4x8/RANS4x8Params.java | 2 +- .../rans/ransnx16/RANSNx16Decode.java | 95 ++++++------- .../rans/ransnx16/RANSNx16Encode.java | 131 ++++++++---------- .../rans/ransnx16/RANSNx16Params.java | 2 +- .../samtools/cram/CRAMInteropTestUtils.java | 7 +- .../htsjdk/samtools/cram/RANSInteropTest.java | 21 ++- .../cram/compression/rans/RansTest.java | 55 ++++---- .../samtools/cram/InteropTest/RANS/qvar | 100 ------------- .../cram/InteropTest/RANS/r4x16/qvar.0 | Bin 32987 -> 0 bytes .../cram/InteropTest/RANS/r4x8/qvar.0 | Bin 32997 -> 0 bytes 17 files changed, 229 insertions(+), 380 deletions(-) delete mode 100644 src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/qvar delete mode 100644 src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/r4x16/qvar.0 delete mode 100644 src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/r4x8/qvar.0 diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java index e8c128a6e2..9ae5512892 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ArithmeticDecoder.java @@ -29,7 +29,7 @@ final public class ArithmeticDecoder { public final int[] cumulativeFreq = new int[Constants.NUMBER_OF_SYMBOLS]; // reverse lookup table - public byte[] reverseLookup = new byte[Constants.TOTAL_FREQ]; + public final byte[] reverseLookup = new byte[Constants.TOTAL_FREQ]; public ArithmeticDecoder() { for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java index 9920209ee9..34d0bc7dda 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSDecodingSymbol.java @@ -56,35 +56,30 @@ public long advanceSymbol4x8(final long rIn, final ByteBuffer byteBuffer, final final int mask = (1 << scaleBits) - 1; // s, x = D(x) - long r = rIn; - r = freq * (r >> scaleBits) + (r & mask) - start; + long ret = freq * (rIn >> scaleBits) + (rIn & mask) - start; // re-normalize - if (r < Constants.RANS_4x8_LOWER_BOUND) { + if (ret < Constants.RANS_4x8_LOWER_BOUND) { do { final int b = 0xFF & byteBuffer.get(); - r = (r << 8) | b; - } while (r < Constants.RANS_4x8_LOWER_BOUND); + ret = (ret << 8) | b; + } while (ret < Constants.RANS_4x8_LOWER_BOUND); } - - return r; + return ret; } public long advanceSymbolNx16(final long rIn, final ByteBuffer byteBuffer, final int scaleBits) { final int mask = (1 << scaleBits) - 1; // s, x = D(x) - long r = rIn; - r = freq * (r >> scaleBits) + (r & mask) - start; + long ret = freq * (rIn >> scaleBits) + (rIn & mask) - start; // re-normalize - if (r < (Constants.RANS_Nx16_LOWER_BOUND)){ - int i = 0xFF & byteBuffer.get(); - i |= (0xFF & byteBuffer.get())<<8; - r = (r << 16) + i; + if (ret < (Constants.RANS_Nx16_LOWER_BOUND)){ + final int i = (0xFF & byteBuffer.get()) | ((0xFF & byteBuffer.get()) << 8); + ret = (ret << 16) + i; } - - return r; + return ret; } -} +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java index 43bf5ad46a..6f21539500 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncode.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.compression.rans; +import htsjdk.samtools.cram.CRAMException; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -40,7 +41,7 @@ protected ByteBuffer allocateOutputBuffer(final int inSize) { final int compressedSize = (int) (1.05 * inSize + 257 * 257 * 3 + 9); final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize); if (outputBuffer.remaining() < compressedSize) { - throw new RuntimeException("Failed to allocate sufficient buffer size for RANS coder."); + throw new CRAMException("Failed to allocate sufficient buffer size for RANS coder."); } outputBuffer.order(ByteOrder.LITTLE_ENDIAN); return outputBuffer; diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java index ea4117cda6..8188d1a825 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/RANSEncodingSymbol.java @@ -67,18 +67,17 @@ public void set(final int start, final int freq, final int scaleBits) { rcpShift += 32; // Avoid the extra >>32 in RansEncPutSymbol } - public long putSymbol4x8(long r, final ByteBuffer byteBuffer) { + public long putSymbol4x8(final long r, final ByteBuffer byteBuffer) { ValidationUtils.validateArg(xMax != 0, "can't encode symbol with freq=0"); // re-normalize - long x = r; - // TODO: x should also be long if there is a case where x could be greater than xMax - if (x >= xMax) { - byteBuffer.put((byte) (x & 0xFF)); - x >>= 8; - if (x >= xMax) { - byteBuffer.put((byte) (x & 0xFF)); - x >>= 8; + long retSymbol = r; + if (retSymbol >= xMax) { + byteBuffer.put((byte) (retSymbol & 0xFF)); + retSymbol >>= 8; + if (retSymbol >= xMax) { + byteBuffer.put((byte) (retSymbol & 0xFF)); + retSymbol >>= 8; } } @@ -89,24 +88,23 @@ public long putSymbol4x8(long r, final ByteBuffer byteBuffer) { // int q = (int) (((uint64_t)x * sym.rcp_freq) >> 32) >> sym.rcp_shift; // The extra >>32 has already been added to RansEncSymbolInit - final long q = ((x * (0xFFFFFFFFL & rcpFreq)) >> rcpShift); - r = x + bias + q * cmplFreq; - return r; + final long q = ((retSymbol * (0xFFFFFFFFL & rcpFreq)) >> rcpShift); + return retSymbol + bias + q * cmplFreq; } - public long putSymbolNx16(long r, final ByteBuffer byteBuffer) { + public long putSymbolNx16(final long r, final ByteBuffer byteBuffer) { ValidationUtils.validateArg(xMax != 0, "can't encode symbol with freq=0"); // re-normalize - long x = r; - if (x >= xMax) { - byteBuffer.put((byte) ((x>>8) & 0xFF)); // extra line - 1 more byte - byteBuffer.put((byte) (x & 0xFF)); - x >>=16; - if (x >= xMax) { - byteBuffer.put((byte) ((x>>8) & 0xFF)); // extra line - 1 more byte - byteBuffer.put((byte) (x & 0xFF)); - x >>=16; + long retSymbol = r; + if (retSymbol >= xMax) { + byteBuffer.put((byte) ((retSymbol>>8) & 0xFF)); // extra line - 1 more byte + byteBuffer.put((byte) (retSymbol & 0xFF)); + retSymbol >>=16; + if (retSymbol >= xMax) { + byteBuffer.put((byte) ((retSymbol>>8) & 0xFF)); // extra line - 1 more byte + byteBuffer.put((byte) (retSymbol & 0xFF)); + retSymbol >>=16; } } @@ -117,8 +115,7 @@ public long putSymbolNx16(long r, final ByteBuffer byteBuffer) { // int q = (int) (((uint64_t)x * sym.rcp_freq) >> 32) >> sym.rcp_shift; // The extra >>32 has already been added to RansEncSymbolInit - final long q = ((x * (0xFFFFFFFFL & rcpFreq)) >> rcpShift); - r = (x + bias + q * cmplFreq); - return r; + final long q = ((retSymbol * (0xFFFFFFFFL & rcpFreq)) >> rcpShift); + return retSymbol + bias + q * cmplFreq; } } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java index 4fc09bedfc..e4160a1612 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java @@ -10,9 +10,8 @@ private static void reverse(final byte[] array, final int offset, final int size } int i = offset; int j = offset + size - 1; - byte tmp; while (j > i) { - tmp = array[j]; + byte tmp = array[j]; array[j] = array[i]; array[i] = tmp; j--; @@ -21,14 +20,12 @@ private static void reverse(final byte[] array, final int offset, final int size } public static void reverse(final ByteBuffer byteBuffer) { - byte tmp; if (byteBuffer.hasArray()) { reverse(byteBuffer.array(), byteBuffer.arrayOffset(), byteBuffer.limit()); } else { for (int i = 0; i < byteBuffer.limit(); i++) { - tmp = byteBuffer.get(i); byteBuffer.put(i, byteBuffer.get(byteBuffer.limit() - i - 1)); - byteBuffer.put(byteBuffer.limit() - i - 1, tmp); + byteBuffer.put(byteBuffer.limit() - i - 1, byteBuffer.get(i)); } } } @@ -38,31 +35,24 @@ public static int RANSGetCumulativeFrequency(final long r, final int scaleBits) return (int) (r & ((1 << scaleBits) - 1)); // since cumulative frequency will be a maximum of 4096 } - // Re-normalize. - public static long RANSDecodeRenormalize4x8(long r, final ByteBuffer byteBuffer) { - - //rans4x8 - // TODO: replace if - do - while with while - if (r < Constants.RANS_4x8_LOWER_BOUND) { - do { - r = (r << 8) | (0xFF & byteBuffer.get()); - } while (r < Constants.RANS_4x8_LOWER_BOUND); + public static long RANSDecodeRenormalize4x8(final long r, final ByteBuffer byteBuffer) { + long ret = r; + while (ret < Constants.RANS_4x8_LOWER_BOUND) { + ret = (ret << 8) | (0xFF & byteBuffer.get()); } - return r; + return ret; } - public static long RANSDecodeRenormalizeNx16(long r, final ByteBuffer byteBuffer) { - // ransNx16 - if (r < (Constants.RANS_Nx16_LOWER_BOUND)) { - int i = (0xFF & byteBuffer.get()); - i |= (0xFF & byteBuffer.get()) << 8; - - r = (r << 16) | i; + public static long RANSDecodeRenormalizeNx16(final long r, final ByteBuffer byteBuffer) { + long ret = r; + if (ret < (Constants.RANS_Nx16_LOWER_BOUND)) { + final int i = (0xFF & byteBuffer.get()) | ((0xFF & byteBuffer.get()) << 8); + ret = (ret << 16) | i; } - return r; + return ret; } - public static void writeUint7(int i, ByteBuffer cp) { + public static void writeUint7(final int i, final ByteBuffer cp) { int s = 0; int X = i; do { @@ -72,12 +62,12 @@ public static void writeUint7(int i, ByteBuffer cp) { do { s -= 7; //writeByte - int s_ = (s > 0) ? 1 : 0; + final int s_ = (s > 0) ? 1 : 0; cp.put((byte) (((i >> s) & 0x7f) + (s_ << 7))); } while (s > 0); } - public static int readUint7(ByteBuffer cp) { + public static int readUint7(final ByteBuffer cp) { int i = 0; int c; do { @@ -101,10 +91,6 @@ public static void normaliseFrequenciesOrder0(final int[] F, final int bits) { // Scale total of frequencies to max final int renormFreq = 1 << bits; - // To avoid division by 0 error, if T=0, set tr = 0. - // when T=0 i.e, when all symbol frequencies are 0, tr is not used anyway. - final long tr = (T>0)?(((long) (renormFreq) << 31) / T + (1 << 30) / T):0; - // keep track of the symbol that has the maximum frequency // in the input Frequency array. // This symbol's frequency might be altered at the end to make sure @@ -117,6 +103,10 @@ public static void normaliseFrequenciesOrder0(final int[] F, final int bits) { M = symbol; } } + + // To avoid division by 0 error, if T=0, set tr = 0. + // when T=0 i.e, when all symbol frequencies are 0, tr is not used anyway. + final long tr = (T>0)?(((long) (renormFreq) << 31) / T + (1 << 30) / T):0; int fsum = 0; for (int symbol = 0; symbol < Constants.NUMBER_OF_SYMBOLS; symbol++) { if (F[symbol] == 0) { @@ -150,10 +140,9 @@ public static void normaliseFrequenciesOrder1(final int[][] F, final int shift) if (F[Constants.NUMBER_OF_SYMBOLS][j]==0){ continue; } - int bitSize = shift; // log2 N = Math.log(N)/Math.log(2) - bitSize = (int) Math.ceil(Math.log(F[Constants.NUMBER_OF_SYMBOLS][j]) / Math.log(2)); + int bitSize = (int) Math.ceil(Math.log(F[Constants.NUMBER_OF_SYMBOLS][j]) / Math.log(2)); if (bitSize > shift) bitSize = shift; @@ -202,4 +191,4 @@ public static void normaliseFrequenciesOrder1Shift(final int[][] F, final int sh } } } -} +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index f1df2a4972..54d19a5e17 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.compression.rans.rans4x8; +import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSDecode; @@ -30,7 +31,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { // compressed bytes length final int inSize = inBuffer.getInt(); if (inSize != inBuffer.remaining() - RAW_BYTE_LENGTH) { - throw new RuntimeException("Incorrect input length."); + throw new CRAMException("Invalid input length detected in a CRAM rans 4x8 input stream."); } // uncompressed bytes length @@ -39,31 +40,30 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { initializeRANSDecoder(); switch (order) { case ZERO: - return uncompressOrder0Way4(inBuffer, outBuffer); + uncompressOrder0Way4(inBuffer, outBuffer); + return outBuffer; case ONE: - return uncompressOrder1Way4(inBuffer, outBuffer); + uncompressOrder1Way4(inBuffer, outBuffer); + return outBuffer; default: - throw new RuntimeException("Unknown rANS order: " + order); + throw new CRAMException("Unknown rANS order: " + order); } } - private ByteBuffer uncompressOrder0Way4(final ByteBuffer inBuffer, final ByteBuffer outBuffer) { + private void uncompressOrder0Way4(final ByteBuffer inBuffer, final ByteBuffer outBuffer) { // read the frequency table. using the frequency table, set the values of RANSDecodingSymbols readStatsOrder0(inBuffer); - - final ArithmeticDecoder D = getD()[0]; - final RANSDecodingSymbol[] syms = getDecodingSymbols()[0]; - long rans0, rans1, rans2, rans3; rans0 = inBuffer.getInt(); rans1 = inBuffer.getInt(); rans2 = inBuffer.getInt(); rans3 = inBuffer.getInt(); - final int out_sz = outBuffer.remaining(); final int out_end = (out_sz & ~3); + final ArithmeticDecoder D = getD()[0]; + final RANSDecodingSymbol[] syms = getDecodingSymbols()[0]; for (int i = 0; i < out_end; i += 4) { final byte c0 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; final byte c1 = D.reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; @@ -124,15 +124,12 @@ private ByteBuffer uncompressOrder0Way4(final ByteBuffer inBuffer, final ByteBuf } outBuffer.rewind(); - return outBuffer; } - private ByteBuffer uncompressOrder1Way4(final ByteBuffer inBuffer, final ByteBuffer outBuffer) { + private void uncompressOrder1Way4(final ByteBuffer inBuffer, final ByteBuffer outBuffer) { // read the frequency table. using the frequency table, set the values of RANSDecodingSymbols readStatsOrder1(inBuffer); - final ArithmeticDecoder[] D = getD(); - final RANSDecodingSymbol[][] syms = getDecodingSymbols(); final int out_sz = outBuffer.remaining(); long rans0, rans1, rans2, rans7; inBuffer.order(ByteOrder.LITTLE_ENDIAN); @@ -150,6 +147,8 @@ private ByteBuffer uncompressOrder1Way4(final ByteBuffer inBuffer, final ByteBuf byte l1 = 0; byte l2 = 0; byte l7 = 0; + final ArithmeticDecoder[] D = getD(); + final RANSDecodingSymbol[][] syms = getDecodingSymbols(); for (; i0 < isz4; i0++, i1++, i2++, i7++) { final byte c0 = D[0xFF & l0].reverseLookup[Utils.RANSGetCumulativeFrequency(rans0, Constants.TOTAL_FREQ_SHIFT)]; final byte c1 = D[0xFF & l1].reverseLookup[Utils.RANSGetCumulativeFrequency(rans1, Constants.TOTAL_FREQ_SHIFT)]; @@ -184,7 +183,6 @@ private ByteBuffer uncompressOrder1Way4(final ByteBuffer inBuffer, final ByteBuf rans7 = syms[0xFF & l7][0xFF & c7].advanceSymbol4x8(rans7, inBuffer, Constants.TOTAL_FREQ_SHIFT); l7 = c7; } - return outBuffer; } private void readStatsOrder0(final ByteBuffer cp) { diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index 35b11fc44c..20331f9c69 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -1,5 +1,6 @@ package htsjdk.samtools.cram.compression.rans.rans4x8; +import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSEncode; import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; @@ -22,7 +23,6 @@ public class RANS4x8Encode extends RANSEncode { public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params) { - final RANSParams.ORDER order= params.getOrder(); if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } @@ -31,6 +31,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params // ORDER-1 encoding of less than 4 bytes is not permitted, so just use ORDER-0 return compressOrder0Way4(inBuffer); } + final RANSParams.ORDER order= params.getOrder(); switch (order) { case ZERO: return compressOrder0Way4(inBuffer); @@ -39,7 +40,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params return compressOrder1Way4(inBuffer); default: - throw new RuntimeException("Unknown rANS order: " + params.getOrder()); + throw new CRAMException("Unknown rANS order: " + params.getOrder()); } } @@ -65,7 +66,6 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { inBuffer.rewind(); final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; - final int cdata_size; final int in_size = inBuffer.remaining(); long rans0, rans1, rans2, rans3; final ByteBuffer ptr = cp.slice(); @@ -102,7 +102,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { ptr.putInt((int) rans1); ptr.putInt((int) rans0); ptr.flip(); - cdata_size = ptr.limit(); + final int cdata_size = ptr.limit(); // reverse the compressed bytes, so that they become in REVERSE order: Utils.reverse(ptr); inBuffer.position(inBuffer.limit()); @@ -127,23 +127,14 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { final ByteBuffer cp = outBuffer.slice(); final int frequencyTableSize = writeFrequenciesOrder1(cp, F); - inBuffer.rewind(); - - final RANSEncodingSymbol[][] syms = getEncodingSymbols(); final int in_size = inBuffer.remaining(); - final int compressedBlobSize; long rans0, rans1, rans2, rans3; rans0 = Constants.RANS_4x8_LOWER_BOUND; rans1 = Constants.RANS_4x8_LOWER_BOUND; rans2 = Constants.RANS_4x8_LOWER_BOUND; rans3 = Constants.RANS_4x8_LOWER_BOUND; - /* - * Slicing is needed for buffer reversing later. - */ - final ByteBuffer ptr = cp.slice(); - final int isz4 = in_size >> 2; int i0 = isz4 - 2; int i1 = 2 * isz4 - 2; @@ -165,6 +156,10 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { // Deal with the remainder byte l3 = inBuffer.get(in_size - 1); + + // Slicing is needed for buffer reversing later + final ByteBuffer ptr = cp.slice(); + final RANSEncodingSymbol[][] syms = getEncodingSymbols(); for (i3 = in_size - 2; i3 > 4 * isz4 - 2 && i3 >= 0; i3--) { final byte c3 = inBuffer.get(i3); rans3 = syms[0xFF & c3][0xFF & l3].putSymbol4x8(rans3, ptr); @@ -199,7 +194,7 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { ptr.putInt((int) rans1); ptr.putInt((int) rans0); ptr.flip(); - compressedBlobSize = ptr.limit(); + final int compressedBlobSize = ptr.limit(); Utils.reverse(ptr); /* * Depletion of the in buffer cannot be confirmed because of the get(int @@ -244,7 +239,6 @@ private static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { for (int i = 0; i < T; i++) { F[0xFF & inBuffer.get()]++; } - final long tr = ((long) Constants.TOTAL_FREQ << 31) / T + (1 << 30) / T; // Normalise so T == TOTFREQ // m is the maximum frequency value @@ -258,6 +252,7 @@ private static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { } } + final long tr = ((long) Constants.TOTAL_FREQ << 31) / T + (1 << 30) / T; int fsum = 0; for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if (F[j] == 0) { @@ -283,7 +278,6 @@ private static int[] calcFrequenciesOrder0(final ByteBuffer inBuffer) { } else { F[M] -= fsum - Constants.TOTAL_FREQ; } - assert (F[M] > 0); return F; } @@ -292,11 +286,10 @@ private static int[][] calcFrequenciesOrder1(final ByteBuffer in) { final int[][] F = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS]; final int[] T = new int[Constants.NUMBER_OF_SYMBOLS]; - int c; - int last_i = 0; for (int i = 0; i < in_size; i++) { - F[last_i][c = (0xFF & in.get())]++; + int c = 0xFF & in.get(); + F[last_i][c]++; T[last_i]++; last_i = c; } @@ -343,9 +336,6 @@ private static int[][] calcFrequenciesOrder1(final ByteBuffer in) { private void buildSymsOrder0(final int[] F) { final RANSEncodingSymbol[] encodingSymbols = getEncodingSymbols()[0]; - // TODO: commented out to suppress spotBugs warning - //final int[] C = new int[Constants.NUMBER_OF_SYMBOLS]; - // T = running sum of frequencies including the current symbol // F[j] = frequency of symbol "j" // C[j] = cumulative frequency of all the symbols preceding "j" (and excluding the frequency of symbol "j") diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java index beab9fe6c7..8ea6c9e855 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Params.java @@ -7,7 +7,7 @@ public class RANS4x8Params implements RANSParams { private final ORDER order; - public RANS4x8Params(ORDER order) { + public RANS4x8Params(final ORDER order) { this.order = order; } diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index 21326af78e..e493dd5364 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -22,7 +22,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { return uncompress(inBuffer, 0); } - private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { + private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } @@ -32,11 +32,11 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { final RANSNx16Params ransNx16Params = new RANSNx16Params(formatFlags); // if nosz flag is set, then uncompressed size is not recorded. - outSize = ransNx16Params.isNosz() ? outSize : Utils.readUint7(inBuffer); + int uncompressedSize = ransNx16Params.isNosz() ? outSize : Utils.readUint7(inBuffer); // if stripe, then decodeStripe if (ransNx16Params.isStripe()) { - return decodeStripe(inBuffer, outSize); + return decodeStripe(inBuffer, uncompressedSize); } // if pack, get pack metadata, which will be used later to decode packed data @@ -44,7 +44,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { int numSymbols = 0; int[] packMappingTable = new int[0]; if (ransNx16Params.isPack()) { - packDataLength = outSize; + packDataLength = uncompressedSize; numSymbols = inBuffer.get() & 0xFF; // if (numSymbols > 16 or numSymbols==0), raise exception @@ -53,9 +53,10 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { for (int i = 0; i < numSymbols; i++) { packMappingTable[i] = inBuffer.get() & 0xFF; } - outSize = Utils.readUint7(inBuffer); + uncompressedSize = Utils.readUint7(inBuffer); } else { - throw new CRAMException("Bit Packing is not permitted when number of distinct symbols is greater than 16 or equal to 0. Number of distinct symbols: " + numSymbols); + throw new CRAMException("Bit Packing is not permitted when number of distinct symbols is greater than 16 or equal to 0. " + + "Number of distinct symbols: " + numSymbols); } } @@ -66,29 +67,29 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { ByteBuffer uncompressedRLEMetaData = null; if (ransNx16Params.isRLE()) { uncompressedRLEMetaDataLength = Utils.readUint7(inBuffer); - uncompressedRLEOutputLength = outSize; - outSize = Utils.readUint7(inBuffer); + uncompressedRLEOutputLength = uncompressedSize; + uncompressedSize = Utils.readUint7(inBuffer); // TODO: maybe move decodeRLEMeta in-line uncompressedRLEMetaData = decodeRLEMeta(inBuffer, uncompressedRLEMetaDataLength, rleSymbols); } - ByteBuffer outBuffer = ByteBuffer.allocate(outSize); + ByteBuffer outBuffer = ByteBuffer.allocate(uncompressedSize);; // If CAT is set then, the input is uncompressed if (ransNx16Params.isCAT()) { - byte[] data = new byte[outSize]; - inBuffer.get(data, 0, outSize); + byte[] data = new byte[uncompressedSize]; + inBuffer.get(data, 0, uncompressedSize); outBuffer = ByteBuffer.wrap(data); } else { - if (outSize != 0) { + if (uncompressedSize != 0) { switch (ransNx16Params.getOrder()) { case ZERO: - uncompressOrder0WayN(inBuffer, outBuffer, outSize, ransNx16Params); + uncompressOrder0WayN(inBuffer, outBuffer, uncompressedSize, ransNx16Params); break; case ONE: uncompressOrder1WayN(inBuffer, outBuffer, ransNx16Params); break; default: - throw new RuntimeException("Unknown rANS order: " + ransNx16Params.getOrder()); + throw new CRAMException("Unknown rANS order: " + ransNx16Params.getOrder()); } } } @@ -105,7 +106,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) { return outBuffer; } - private ByteBuffer uncompressOrder0WayN( + private void uncompressOrder0WayN( final ByteBuffer inBuffer, final ByteBuffer outBuffer, final int outSize, @@ -116,9 +117,6 @@ private ByteBuffer uncompressOrder0WayN( readFrequencyTableOrder0(inBuffer); // uncompress using Nway rans states - //TODO: remove this temporary variable aliasing/staging - final ArithmeticDecoder D = getD()[0]; - final RANSDecodingSymbol[] syms = getDecodingSymbols()[0]; final int Nway = ransNx16Params.getNumInterleavedRANSStates(); // Nway parallel rans states. Nway = 4 or 32 @@ -138,6 +136,8 @@ private ByteBuffer uncompressOrder0WayN( // Number of elements that don't fall into the Nway streams int remSize = outSize - (interleaveSize * Nway); final int out_end = outSize - remSize; + final ArithmeticDecoder D = getD()[0]; + final RANSDecodingSymbol[] syms = getDecodingSymbols()[0]; for (int i = 0; i < out_end; i += Nway) { for (int r=0; r> 4; final boolean optionalCompressFlag = ((frequencyTableFirstByte & FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK)!=0); - ByteBuffer freqTableSource; + final ByteBuffer freqTableSource; if (optionalCompressFlag) { // spec: The order-1 frequency table itself may still be quite large, @@ -189,16 +187,15 @@ private ByteBuffer uncompressOrder1WayN( // decode the compressedFreqTable to get the uncompressedFreqTable using RANS Nx16, N=4 Order 0 uncompress freqTableSource = ByteBuffer.allocate(uncompressedLength); - ByteBuffer compressedFrequencyTableBuffer = ByteBuffer.wrap(compressedFreqTable); + final ByteBuffer compressedFrequencyTableBuffer = ByteBuffer.wrap(compressedFreqTable); compressedFrequencyTableBuffer.order(ByteOrder.LITTLE_ENDIAN); uncompressOrder0WayN(compressedFrequencyTableBuffer, freqTableSource, uncompressedLength,new RANSNx16Params(0x00)); // format flags = 0 } else { freqTableSource = inBuffer; } + final int shift = frequencyTableFirstByte >> 4; readFrequencyTableOrder1(freqTableSource, shift); - final ArithmeticDecoder[] D = getD(); - final RANSDecodingSymbol[][] syms = getDecodingSymbols(); final int outputSize = outBuffer.remaining(); inBuffer.order(ByteOrder.LITTLE_ENDIAN); @@ -207,7 +204,6 @@ private ByteBuffer uncompressOrder1WayN( final long[] rans = new long[Nway]; final int[] interleaveStreamIndex = new int[Nway]; final int[] context = new int[Nway]; - final int[] symbol = new int[Nway]; // size of interleaved stream = outputSize / Nway // For Nway = 4, division by 4 is the same as right shift by 2 bits @@ -222,6 +218,9 @@ private ByteBuffer uncompressOrder1WayN( context[r] = 0; } + final ArithmeticDecoder[] D = getD(); + final RANSDecodingSymbol[][] syms = getDecodingSymbols(); + final int[] symbol = new int[Nway]; while (interleaveStreamIndex[0] < interleaveSize) { for (int r = 0; r < Nway; r++){ symbol[r] = 0xFF & D[context[r]].reverseLookup[Utils.RANSGetCumulativeFrequency(rans[r], shift)]; @@ -242,7 +241,6 @@ private ByteBuffer uncompressOrder1WayN( rans[Nway - 1] = syms[context[Nway - 1]][symbol[Nway - 1]].advanceSymbolNx16(rans[Nway - 1], inBuffer, shift); context[Nway - 1] = symbol[Nway - 1]; } - return outBuffer; } private void readFrequencyTableOrder0( @@ -251,10 +249,7 @@ private void readFrequencyTableOrder0( // Use the Frequency table to set the values of Frequencies, Cumulative Frequency // and Reverse Lookup table - final ArithmeticDecoder decoder = getD()[0]; - final RANSDecodingSymbol[] decodingSymbols = getDecodingSymbols()[0]; final int[] alphabet = readAlphabet(cp); - int cumulativeFrequency = 0; final int[] frequencies = new int[Constants.NUMBER_OF_SYMBOLS]; // read frequencies, normalise frequencies then calculate C and R @@ -266,7 +261,10 @@ private void readFrequencyTableOrder0( } } } - Utils.normaliseFrequenciesOrder0Shift(frequencies,12); + Utils.normaliseFrequenciesOrder0Shift(frequencies, Constants.TOTAL_FREQ_SHIFT); + final ArithmeticDecoder decoder = getD()[0]; + final RANSDecodingSymbol[] decodingSymbols = getDecodingSymbols()[0]; + int cumulativeFrequency = 0; for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if(alphabet[j]>0){ @@ -339,8 +337,9 @@ private static int[] readAlphabet(final ByteBuffer cp){ symbol++; } else { symbol = cp.get() & 0xFF; - if (symbol == lastSymbol+1) + if (symbol == lastSymbol+1) { rle = cp.get() & 0xFF; + } } lastSymbol = symbol; } while (symbol != 0); @@ -351,17 +350,16 @@ private ByteBuffer decodeRLEMeta( final ByteBuffer inBuffer, final int uncompressedRLEMetaDataLength, final int[] rleSymbols) { - ByteBuffer uncompressedRLEMetaData; - final int compressedRLEMetaDataLength; + final ByteBuffer uncompressedRLEMetaData; if ((uncompressedRLEMetaDataLength & 0x01)!=0) { - byte[] uncompressedRLEMetaDataArray = new byte[(uncompressedRLEMetaDataLength-1)/2]; + final byte[] uncompressedRLEMetaDataArray = new byte[(uncompressedRLEMetaDataLength-1)/2]; inBuffer.get(uncompressedRLEMetaDataArray, 0, (uncompressedRLEMetaDataLength-1)/2); uncompressedRLEMetaData = ByteBuffer.wrap(uncompressedRLEMetaDataArray); } else { - compressedRLEMetaDataLength = Utils.readUint7(inBuffer); - byte[] compressedRLEMetaDataArray = new byte[compressedRLEMetaDataLength]; + final int compressedRLEMetaDataLength = Utils.readUint7(inBuffer); + final byte[] compressedRLEMetaDataArray = new byte[compressedRLEMetaDataLength]; inBuffer.get(compressedRLEMetaDataArray,0,compressedRLEMetaDataLength); - ByteBuffer compressedRLEMetaData = ByteBuffer.wrap(compressedRLEMetaDataArray); + final ByteBuffer compressedRLEMetaData = ByteBuffer.wrap(compressedRLEMetaDataArray); compressedRLEMetaData.order(ByteOrder.LITTLE_ENDIAN); uncompressedRLEMetaData = ByteBuffer.allocate(uncompressedRLEMetaDataLength / 2); @@ -380,16 +378,16 @@ private ByteBuffer decodeRLEMeta( } private ByteBuffer decodeRLE( - ByteBuffer inBuffer, + final ByteBuffer inBuffer, final int[] rleSymbols, final ByteBuffer uncompressedRLEMetaData, final int uncompressedRLEOutputLength) { - ByteBuffer rleOutBuffer = ByteBuffer.allocate(uncompressedRLEOutputLength); + final ByteBuffer rleOutBuffer = ByteBuffer.allocate(uncompressedRLEOutputLength); int j = 0; for(int i = 0; j< uncompressedRLEOutputLength; i++){ - byte sym = inBuffer.get(i); + final byte sym = inBuffer.get(i); if (rleSymbols[sym & 0xFF]!=0){ - int run = Utils.readUint7(uncompressedRLEMetaData); + final int run = Utils.readUint7(uncompressedRLEMetaData); for (int r=0; r<= run; r++){ rleOutBuffer.put(j++, sym); } @@ -397,18 +395,16 @@ private ByteBuffer decodeRLE( rleOutBuffer.put(j++, sym); } } - inBuffer = rleOutBuffer; - return inBuffer; + return rleOutBuffer; } private ByteBuffer decodePack( - ByteBuffer inBuffer, + final ByteBuffer inBuffer, final int[] packMappingTable, final int numSymbols, final int uncompressedPackOutputLength) { - ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength); + final ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength); int j = 0; - if (numSymbols <= 1) { for (int i=0; i < uncompressedPackOutputLength; i++){ outBufferPack.put(i, (byte) packMappingTable[0]); @@ -450,8 +446,7 @@ else if (numSymbols <= 16){ v >>=4; } } - inBuffer = outBufferPack; - return inBuffer; + return outBufferPack; } private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){ diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java index ebe880f0d3..fe0e8662ec 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Encode.java @@ -12,7 +12,6 @@ public class RANSNx16Encode extends RANSEncode { ///////////////////////////////////////////////////////////////////////////////////////////////// - // This is a naive implementation of RANS Nx16 Encoding // Stripe flag is not implemented in the write implementation ///////////////////////////////////////////////////////////////////////////////////////////////// @@ -90,27 +89,28 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN return outBuffer; } - return compressOrder0WayN(inputBuffer, new RANSNx16Params(outBuffer.get(0)), outBuffer); + compressOrder0WayN(inputBuffer, new RANSNx16Params(outBuffer.get(0)), outBuffer); + return outBuffer; } switch (ransNx16Params.getOrder()) { case ZERO: - return compressOrder0WayN(inputBuffer, ransNx16Params, outBuffer); + compressOrder0WayN(inputBuffer, ransNx16Params, outBuffer); + return outBuffer; case ONE: - return compressOrder1WayN(inputBuffer, ransNx16Params, outBuffer); + compressOrder1WayN(inputBuffer, ransNx16Params, outBuffer); + return outBuffer; default: - throw new RuntimeException("Unknown rANS order: " + ransNx16Params.getOrder()); + throw new CRAMException("Unknown rANS order: " + ransNx16Params.getOrder()); } } - private ByteBuffer compressOrder0WayN ( + private void compressOrder0WayN ( final ByteBuffer inBuffer, final RANSNx16Params ransNx16Params, final ByteBuffer outBuffer) { initializeRANSEncoder(); final int inSize = inBuffer.remaining(); - final int[] F = buildFrequenciesOrder0(inBuffer); - final ByteBuffer cp = outBuffer.slice(); int bitSize = (int) Math.ceil(Math.log(inSize) / Math.log(2)); // TODO: Can bitSize be 0 and should we handle it? @@ -118,6 +118,8 @@ private ByteBuffer compressOrder0WayN ( bitSize = Constants.TOTAL_FREQ_SHIFT; } final int prefix_size = outBuffer.position(); + final int[] F = buildFrequenciesOrder0(inBuffer); + final ByteBuffer cp = outBuffer.slice(); // Normalize Frequencies such that sum of Frequencies = 1 << bitsize Utils.normaliseFrequenciesOrder0(F, bitSize); @@ -135,29 +137,24 @@ private ByteBuffer compressOrder0WayN ( // update the RANS Encoding Symbols buildSymsOrder0(F); inBuffer.rewind(); - - //TODO: tmp staging glue - final RANSEncodingSymbol[] ransEncodingSymbols = getEncodingSymbols()[0]; final int Nway = ransNx16Params.getNumInterleavedRANSStates(); - - final int compressedDataSize; - final int inputSize = inBuffer.remaining(); - final ByteBuffer ptr = cp.slice(); final long[] rans = new long[Nway]; - final byte[] symbol = new byte[Nway]; - for (int r=0; r> 2) : (inputSize >> 5); int remainingSize = inputSize - (interleaveSize * Nway); int reverseIndex = 1; + final ByteBuffer ptr = cp.slice(); + final RANSEncodingSymbol[] ransEncodingSymbols = getEncodingSymbols()[0]; // encoded in LIFO order while (remainingSize>0){ @@ -167,6 +164,7 @@ private ByteBuffer compressOrder0WayN ( remainingSize --; reverseIndex ++; } + final byte[] symbol = new byte[Nway]; for (int i = (interleaveSize * Nway); i > 0; i -= Nway) { for (int r = Nway - 1; r >= 0; r--){ @@ -180,7 +178,7 @@ private ByteBuffer compressOrder0WayN ( } ptr.position(); ptr.flip(); - compressedDataSize = ptr.limit(); + final int compressedDataSize = ptr.limit(); // since the data is encoded in reverse order, // reverse the compressed bytes, so that it is in correct order when uncompressed. @@ -188,15 +186,13 @@ private ByteBuffer compressOrder0WayN ( inBuffer.position(inBuffer.limit()); outBuffer.rewind(); // set position to 0 outBuffer.limit(prefix_size + frequencyTableSize + compressedDataSize); - return outBuffer; } - private ByteBuffer compressOrder1WayN ( + private void compressOrder1WayN ( final ByteBuffer inBuffer, final RANSNx16Params ransNx16Params, final ByteBuffer outBuffer) { initializeRANSEncoder(); - final ByteBuffer cp = outBuffer.slice(); final int[][] frequencies = buildFrequenciesOrder1(inBuffer, ransNx16Params.getNumInterleavedRANSStates()); // normalise frequencies with a variable shift calculated @@ -206,7 +202,7 @@ private ByteBuffer compressOrder1WayN ( // TODO: How is the buffer size calculated? js: 257*257*3+9 ByteBuffer frequencyTable = allocateOutputBuffer(1); - ByteBuffer compressedFrequencyTable = allocateOutputBuffer(1); + final ByteBuffer compressedFrequencyTable = allocateOutputBuffer(1); // uncompressed frequency table final int uncompressedFrequencyTableSize = writeFrequenciesOrder1(frequencyTable,frequencies); @@ -214,9 +210,10 @@ private ByteBuffer compressOrder1WayN ( frequencyTable.rewind(); // compressed frequency table using RANS Nx16 Order 0 - compressedFrequencyTable = compressOrder0WayN(frequencyTable, new RANSNx16Params(0x00), compressedFrequencyTable); + compressOrder0WayN(frequencyTable, new RANSNx16Params(0x00), compressedFrequencyTable); frequencyTable.rewind(); - int compressedFrequencyTableSize = compressedFrequencyTable.limit(); + final int compressedFrequencyTableSize = compressedFrequencyTable.limit(); + final ByteBuffer cp = outBuffer.slice(); // spec: The order-1 frequency table itself may still be quite large, // so is optionally compressed using the order-0 rANSNx16 codec with a fixed 4-way interleaving. @@ -242,7 +239,7 @@ private ByteBuffer compressOrder1WayN ( i++; } } - int frequencyTableSize = cp.position(); + final int frequencyTableSize = cp.position(); // normalise frequencies with a constant shift Utils.normaliseFrequenciesOrder1Shift(frequencies, Constants.TOTAL_FREQ_SHIFT); @@ -251,30 +248,21 @@ private ByteBuffer compressOrder1WayN ( buildSymsOrder1(frequencies); // TODO: move into utils // uncompress for Nway = 4. then extend Nway to be variable - 4 or 32 - - //TODO: tmp staging - final RANSEncodingSymbol[][] ransEncodingSymbols = getEncodingSymbols(); final int Nway = ransNx16Params.getNumInterleavedRANSStates(); - final int inputSize = inBuffer.remaining(); final long[] rans = new long[Nway]; - for (int r=0; r> 2: inputSize >> 5; final int[] interleaveStreamIndex = new int[Nway]; final byte[] symbol = new byte[Nway]; - final byte[] context = new byte[Nway]; for (int r=0; r> 2)))) : (0xFF & inBuffer.get((n*(inputSize >> 5)))); + final int symbol = Nway == 4 ? (0xFF & inBuffer.get((n*(inputSize >> 2)))) : (0xFF & inBuffer.get((n*(inputSize >> 5)))); frequency[0][symbol]++; } frequency[Constants.NUMBER_OF_SYMBOLS][0] += Nway-1; @@ -520,7 +511,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff int lastSymbol = -1; for (int i = 0; i < inputSize; i++) { - int currentSymbol = inBuffer.get(i)&0xFF; + final int currentSymbol = inBuffer.get(i)&0xFF; runCounts[currentSymbol] += (currentSymbol==lastSymbol ? 1:-1); lastSymbol = currentSymbol; } @@ -542,7 +533,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff // create rleMetaData buffer to store rle metadata. // This buffer will be compressed using compressOrder0WayN towards the end of this method // TODO: How did we come up with this calculation for Buffer size? numRLESymbols+1+inputSize - ByteBuffer rleMetaData = ByteBuffer.allocate(numRLESymbols+1+inputSize); // rleMetaData + final ByteBuffer rleMetaData = ByteBuffer.allocate(numRLESymbols+1+inputSize); // rleMetaData // write number of symbols that are run length encoded rleMetaData.put((byte) numRLESymbols); @@ -556,12 +547,12 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff } // Apply RLE - // encodedData -> input src data without repetition - ByteBuffer encodedData = ByteBuffer.allocate(inputSize); // rleInBuffer - int encodedDataIdx = 0; // rleInBufferIndex + // encodedBuffer -> input src data without repetition + final ByteBuffer encodedBuffer = ByteBuffer.allocate(inputSize); // rleInBuffer + int encodedBufferIdx = 0; // rleInBufferIndex for (int i = 0; i < inputSize; i++) { - encodedData.put(encodedDataIdx++,inBuffer.get(i)); + encodedBuffer.put(encodedBufferIdx++,inBuffer.get(i)); if (runCounts[inBuffer.get(i)&0xFF]>0) { lastSymbol = inBuffer.get(i) & 0xFF; int run = 0; @@ -574,18 +565,18 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff // write the run value to metadata Utils.writeUint7(run, rleMetaData); - // go to the next element that is not equal to it's previous element + // go to the next element that is not equal to its previous element i += run; } } - encodedData.limit(encodedDataIdx); + encodedBuffer.limit(encodedBufferIdx); // limit and rewind rleMetaData.limit(rleMetaData.position()); rleMetaData.rewind(); // compress the rleMetaData Buffer - ByteBuffer compressedRleMetaData = allocateOutputBuffer(rleMetaData.remaining()); + final ByteBuffer compressedRleMetaData = allocateOutputBuffer(rleMetaData.remaining()); // TODO: Nway? Check other places as well -> How to setInterleaveSize? - can i do it by changing formatflags? // // Compress lengths with O0 and literals with O0/O1 ("order" param) @@ -595,7 +586,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff // write to compressedRleMetaData to outBuffer Utils.writeUint7(rleMetaData.limit()*2, outBuffer); - Utils.writeUint7(encodedDataIdx, outBuffer); + Utils.writeUint7(encodedBufferIdx, outBuffer); Utils.writeUint7(compressedRleMetaData.limit(),outBuffer); outBuffer.put(compressedRleMetaData); @@ -605,7 +596,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff * position) method use during encoding, hence enforcing: */ inBuffer.position(inBuffer.limit()); - return encodedData; + return encodedBuffer; } private ByteBuffer encodePack( @@ -615,44 +606,44 @@ private ByteBuffer encodePack( final int[] packMappingTable, final int numSymbols){ final int inSize = inBuffer.remaining(); - ByteBuffer data; + final ByteBuffer encodedBuffer; if (numSymbols <= 1) { - data = ByteBuffer.allocate(0); + encodedBuffer = ByteBuffer.allocate(0); } else if (numSymbols <= 2) { // 1 bit per value - int dataSize = (int) Math.ceil((double) inSize/8); - data = ByteBuffer.allocate(dataSize); + final int encodedBufferSize = (int) Math.ceil((double) inSize/8); + encodedBuffer = ByteBuffer.allocate(encodedBufferSize); int j = -1; for (int i = 0; i < inSize; i ++) { if (i % 8 == 0) { - data.put(++j, (byte) 0); + encodedBuffer.put(++j, (byte) 0); } - data.put(j, (byte) (data.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << (i % 8)))); + encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << (i % 8)))); } } else if (numSymbols <= 4) { // 2 bits per value - int dataSize = (int) Math.ceil((double) inSize/4); - data = ByteBuffer.allocate(dataSize); + final int encodedBufferSize = (int) Math.ceil((double) inSize/4); + encodedBuffer = ByteBuffer.allocate(encodedBufferSize); int j = -1; for (int i = 0; i < inSize; i ++) { if (i % 4 == 0) { - data.put(++j, (byte) 0); + encodedBuffer.put(++j, (byte) 0); } - data.put(j, (byte) (data.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 4) * 2)))); + encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 4) * 2)))); } } else { // 4 bits per value - int dataSize = (int) Math.ceil((double)inSize/2); - data = ByteBuffer.allocate(dataSize); + final int encodedBufferSize = (int) Math.ceil((double)inSize/2); + encodedBuffer = ByteBuffer.allocate(encodedBufferSize); int j = -1; for (int i = 0; i < inSize; i ++) { if (i % 2 == 0) { - data.put(++j, (byte) 0); + encodedBuffer.put(++j, (byte) 0); } - data.put(j, (byte) (data.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 2) * 4)))); + encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 2) * 4)))); } } @@ -667,8 +658,8 @@ private ByteBuffer encodePack( } // write the length of data - Utils.writeUint7(data.limit(), outBuffer); - return data; // Here position = 0 since we have always accessed the data buffer using index + Utils.writeUint7(encodedBuffer.limit(), outBuffer); + return encodedBuffer; // Here position = 0 since we have always accessed the data buffer using index } } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java index ea17b23d51..93bd529f27 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Params.java @@ -20,7 +20,7 @@ public class RANSNx16Params implements RANSParams { private static final int FORMAT_FLAG_MASK = 0xFF; - public RANSNx16Params(int formatFlags) { + public RANSNx16Params(final int formatFlags) { this.formatFlags = formatFlags; } diff --git a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java index bcc7b9ce96..71b7b39555 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java @@ -67,17 +67,16 @@ protected static List getInteropCompressedFilePaths(final String compresse // Given a compressed test file path, return the corresponding uncompressed file path protected static final Path getUnCompressedFilePath(final Path compressedInteropPath) { - String uncompressedFileName = getUncompressedFileName(compressedInteropPath.getFileName().toString()); + final String uncompressedFileName = getUncompressedFileName(compressedInteropPath.getFileName().toString()); // Example compressedInteropPath: ../dat/r4x8/q4.1 => unCompressedFilePath: ../dat/q4 return compressedInteropPath.getParent().getParent().resolve(uncompressedFileName); } private static final String getUncompressedFileName(final String compressedFileName) { // Returns original filename from compressed file name - int lastDotIndex = compressedFileName.lastIndexOf("."); + final int lastDotIndex = compressedFileName.lastIndexOf("."); if (lastDotIndex >= 0) { - String fileName = compressedFileName.substring(0, lastDotIndex); - return fileName; + return compressedFileName.substring(0, lastDotIndex); } else { throw new CRAMException("The format of the compressed File Name is not as expected. " + "The name of the compressed file should contain a period followed by a number that" + diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index 0b1b428048..f34014b7cd 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -27,11 +27,6 @@ import java.util.List; import java.util.stream.Stream; -import static htsjdk.samtools.cram.CRAMInteropTestUtils.filterEmbeddedNewlines; -import static htsjdk.samtools.cram.CRAMInteropTestUtils.getInteropCompressedFilePaths; -import static htsjdk.samtools.cram.CRAMInteropTestUtils.getParamsFormatFlags; -import static htsjdk.samtools.cram.CRAMInteropTestUtils.getUnCompressedFilePath; - /** * RANSInteropTest tests if the htsjdk RANS4x8 and RANSNx16 implementations are interoperable * with the htslib implementations. The test files for Interop tests is kept in a separate repository, @@ -52,13 +47,13 @@ public Object[][] get4x8TestCases() throws IOException { // compressed testfile path, uncompressed testfile path, // RANS encoder, RANS decoder, RANS params final List testCases = new ArrayList<>(); - for (Path path : getInteropCompressedFilePaths(COMPRESSED_RANS4X8_DIR)) { + for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_RANS4X8_DIR)) { Object[] objects = new Object[]{ path, - getUnCompressedFilePath(path), + CRAMInteropTestUtils.getUnCompressedFilePath(path), new RANS4x8Encode(), new RANS4x8Decode(), - new RANS4x8Params(RANSParams.ORDER.fromInt(getParamsFormatFlags(path))) + new RANS4x8Params(RANSParams.ORDER.fromInt(CRAMInteropTestUtils.getParamsFormatFlags(path))) }; testCases.add(objects); } @@ -72,13 +67,13 @@ public Object[][] getNx16TestCases() throws IOException { // compressed testfile path, uncompressed testfile path, // RANS encoder, RANS decoder, RANS params final List testCases = new ArrayList<>(); - for (Path path : getInteropCompressedFilePaths(COMPRESSED_RANSNX16_DIR)) { + for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_RANSNX16_DIR)) { Object[] objects = new Object[]{ path, - getUnCompressedFilePath(path), + CRAMInteropTestUtils.getUnCompressedFilePath(path), new RANSNx16Encode(), new RANSNx16Decode(), - new RANSNx16Params(getParamsFormatFlags(path)) + new RANSNx16Params(CRAMInteropTestUtils.getParamsFormatFlags(path)) }; testCases.add(objects); } @@ -118,7 +113,7 @@ public void testRANSRoundTrip( // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results - final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); // Stripe Flag is not implemented in RANSNx16 Encoder. // The encoder throws CRAMException if Stripe Flag is used. @@ -149,7 +144,7 @@ public void testDecodeOnly( // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results - final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream)); // Use htsjdk to uncompress the precompressed file from htscodecs repo diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 2dde86ef4c..0ba31a4c4b 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -106,7 +106,7 @@ public Object[][] getRansNx16Codecs() { ); final List testCases = new ArrayList<>(); for (Integer ransNx16ParamsFormatFlag : ransNx16ParamsFormatFlagList) { - Object[] objects = new Object[]{ + final Object[] objects = new Object[]{ new RANSNx16Encode(), new RANSNx16Decode(), new RANSNx16Params(ransNx16ParamsFormatFlag) @@ -169,9 +169,9 @@ public void testRoundTripTinySmallLarge( final Integer lowerLimit, final Integer upperLimit){ final ByteBuffer in = ByteBuffer.wrap(td.testArray); - for (int size = lowerLimit; size < upperLimit; size++) { + for (int rawSize = lowerLimit; rawSize < upperLimit; rawSize++) { in.position(0); - in.limit(size); + in.limit(rawSize); ransRoundTrip(ransEncode, ransDecode, params, in); } } @@ -181,13 +181,13 @@ public void testRans4x8BuffersMeetBoundaryExpectations( final RANS4x8Encode ransEncode, final RANS4x8Decode ransDecode, final RANS4x8Params params) { - final int size = 1001; - final ByteBuffer raw = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); - final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(size,raw,ransEncode, ransDecode,params); + final int rawSize = 1001; + final ByteBuffer rawData = ByteBuffer.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); + final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(rawSize,rawData,ransEncode, ransDecode,params); Assert.assertTrue(compressed.limit() > 10); Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); Assert.assertEquals(compressed.getInt(), compressed.limit() - 1 - 4 - 4); - Assert.assertEquals(compressed.getInt(), size); + Assert.assertEquals(compressed.getInt(), rawSize); } @Test(dataProvider = "ransNx16") @@ -195,18 +195,18 @@ public void testRansNx16BuffersMeetBoundaryExpectations( final RANSNx16Encode ransEncode, final RANSNx16Decode ransDecode, final RANSNx16Params params) { - final int size = 1001; - final ByteBuffer raw = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); - final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(size,raw,ransEncode,ransDecode,params); + final int rawSize = 1001; + final ByteBuffer rawData = ByteBuffer.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); + final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(rawSize,rawData,ransEncode,ransDecode,params); Assert.assertTrue(compressed.limit() > 1); // minimum prefix len when input is not Empty final int FormatFlags = compressed.get(); // first byte of compressed data is the formatFlags - raw.rewind(); - int numSym = 0; + rawData.rewind(); final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; - final int inSize = raw.remaining(); + final int inSize = rawData.remaining(); for (int i = 0; i < inSize; i ++) { - F[raw.get(i) & 0xFF]++; + F[rawData.get(i) & 0xFF]++; } + int numSym = 0; for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { if (F[i]>0) { numSym++; @@ -219,7 +219,7 @@ public void testRansNx16BuffersMeetBoundaryExpectations( } // if nosz flag is not set, then the uncompressed size is recorded if (!params.isNosz()){ - Assert.assertEquals(Utils.readUint7(compressed), size); + Assert.assertEquals(Utils.readUint7(compressed), rawSize); } } @@ -228,15 +228,15 @@ public void testRans4x8Header( final RANS4x8Encode ransEncode, final RANS4x8Decode unused, final RANS4x8Params params) { - final int size = 1000; - final ByteBuffer data = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); - final ByteBuffer compressed = ransEncode.compress(data, params); + final int rawSize = 1000; + final ByteBuffer rawData = ByteBuffer.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); + final ByteBuffer compressed = ransEncode.compress(rawData, params); // first byte of compressed data gives the order Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); // the next 4 bytes gives the compressed size Assert.assertEquals(compressed.getInt(), compressed.limit() - 9); // the next 4 bytes gives the uncompressed size - Assert.assertEquals(compressed.getInt(), data.limit()); + Assert.assertEquals(compressed.getInt(), rawData.limit()); } @Test(dataProvider = "ransNx16") @@ -245,17 +245,16 @@ public void testRansNx16Header( final RANSNx16Decode unused, final RANSNx16Params params) { final int size = 1000; - final ByteBuffer data = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); - final ByteBuffer compressed = ransEncode.compress(data, params); - // first byte of compressed data gives the formatFlags - data.rewind(); + final ByteBuffer rawData = ByteBuffer.wrap(randomBytesFromGeometricDistribution(size, 0.01)); + final ByteBuffer compressed = ransEncode.compress(rawData, params); + rawData.rewind(); final int FormatFlags = compressed.get() & 0xFF; // first byte of compressed data is the formatFlags - int numSym = 0; final int[] F = new int[Constants.NUMBER_OF_SYMBOLS]; - final int inSize = data.remaining(); + final int inSize = rawData.remaining(); for (int i = 0; i < inSize; i ++) { - F[data.get(i) & 0xFF]++; + F[rawData.get(i) & 0xFF]++; } + int numSym = 0; for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) { if (F[i]>0) { numSym++; @@ -285,7 +284,7 @@ public void testRoundTrip( dataProvider = "RansNx16DecodeOnlyAndData", expectedExceptions = { CRAMException.class }, expectedExceptionsMessageRegExp = "RANSNx16 Encoding with Stripe Flag is not implemented.") - public void testRansNx16EncodeStripe( + public void testRansNx16RejectEncodeStripe( final RANSNx16Encode ransEncode, final RANSNx16Decode unused, final RANSNx16Params params, @@ -293,7 +292,7 @@ public void testRansNx16EncodeStripe( // When td is not Empty, Encoding with Stripe Flag should throw an Exception // as Encode Stripe is not implemented - final ByteBuffer compressed = ransEncode.compress(ByteBuffer.wrap(td.testArray), params); + ransEncode.compress(ByteBuffer.wrap(td.testArray), params); } // TODO: Add Test to DecodePack with nsym > 16 diff --git a/src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/qvar b/src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/qvar deleted file mode 100644 index acb3335ce0..0000000000 --- a/src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/qvar +++ /dev/null @@ -1,100 +0,0 @@ -S9$#6#6%7654A5634A;$<65D67144>457$4A6774=5754>9%34>%#%4A588%A#76234=47%$4A954>7%34>9244966$3<7864A5755A687A678A7$74D79"43A5789/?671$$A775$A7735D878A<1#66$4>6724A652A67$3>7%4:561>679.6914A699D768$47%34987$5>8A6%%45A6777A66#5A68$5A7334>7815>47#24D57144>52%A663:85>575%>66734=5%4A572$5A67:87#44A6774D:544A85$$>478A561$4=5%54>4"6#$8A76$$4>666143<56%54;6#%4D66344;6624A45#4A9=72>9614#6A78#44:9D8965>5644A876?67335A9%81#%;46%45#$4A6744:63;6774$>7775>:64$967625A7675A874A;7#4565"24%61%46634%6$#35%6%4$7$#%47$%4756345727$690<345>5775D8625A6677"<4623%4>572$4<$%<5"514A57$;46%23;:54>6#3<4#6<5734$8:582$4>56134;6#$4>576<684474$3>$24>766$34A954A6832#;57124%%"64%>6634A7$7/:671$4A5%"%%A9277>6>46"%4<62"447$4A66144:9#7#4A#8144<614>6$$%34<563344<67"4<%#23;5664>6624$%$#"<$""4<"5%$$4>6#%">573244A8##2#"1#$#$2$$$#$#""$$"##"%%"#"#"##$###"#""%%$233662#%347614455#5$476345>98$#24A68345D4624A57#45:8678%666$>83A56$4D768;86624A66344A77144A67745D7$54>7525A567$$>59<#25A%:;:674<59$5A7667<;8777A:9%444@59337;9761%24A5#44A574C975A769;7725A7%6#334A76#A67$4A5633>%6$%4<56%4;5124>55124;5$#45"$#4>6234>6574A7%3A79"44A;527<:734A:785/$3498798759:B%4AB<2.@A??@<:@75:D?B::<6=6@?81#%#$:<:7#>:@BAB<@:=6565%95=@%5A749=%>87:9:7?:>6A6566?9>B?7=<<;;78C6695<859D7"98475<9>6=8:3<6=6;$@8<78%434244346"573682%;8796442$5$5856437$%338=6:269=8=5=7:1657/5242552868553<;;535418564373878828;=7838:=$87#68/72418263%54818$=:9:;7:4858-6$%4658283%8579=6;:97$42:58280<587/86;755<9=79#5687$5$45%576836687549=%7;768#%1534435%165#;;%7/78889%;475$952186887:=785<==:9=78365:9295:97<18378<7<9=8@7:7:96:<>A7;;58<7<$@$#.<%5<=:@7678$:57539B>><>877=6@5>9=$B<=:5:@><687?:?=<976C=>8=476:754/978A:7872."$$##$###480745?;4358;C$72A3961?:68<276768/78$4:?59794?><$6:C=;@A96>94.4665:7845894@86<5;=%34>9>6$9A=$;2516878A79;@%7$%49:79B45656A;68;887A:A;9?5>8>;:=A<:B;8549=8784<>>:A62/828%58=5@A2085443A:<89>5>888AE4A:A46?<88%29898D;>DB8:$@9@9;8:4:?898@6786=54@8;8>8D89;A7>6:;4739234575A<>@>88;@7@;8434668AA857:4:6<5@978A84:6%9?5??A8869<<<8A:;:::5%9=52596=7@>AA8@E;98<76A=4C=7=88@63A4$A9>6:8$>863D95:?898=?8@8666>$%4<8@>?983>88B=9<@D7<<7:9;8A6;%6A898:A44@E=:$@A?5A#$%9=%47>B=9D:@7@D:DE98993D%223=198;;<$8998998>96>89898A649@=9$69%;?A%6@433:<99;:;#98984B?>9>98587?885=A>8<#4>A?>?%>$%>9979%55#6%935@A;984$>678>5579<;6;:8889A=:A7?7A89"%A8685$>88;6:?99<@88>=>A=894>AAAB85?9955:9::::".:;=;89:?5;:<;64@<8#878988899A=8776:8>8;<%=<:#<8:A99<44@9>9;@89<@E;99@>=97@89<96<88=549<9@889:>?8@9983:432674A79>79:;:A<99%34=>;;?88989;<;8@A?1<:34;;$$19;AB8;@A;A859:?>26?68>6$6:9A<88?:9<;99:74@A=>:7;$8:98:;>>6C7=;@A<8;%9593232<6>=;;=<@9;C=5$37>;?98AA8>;5>D?96<986A7#371%8%95=6%4>8545$#5%=:88388:6$D9AB#?A<829:8A=95:>8$D;<>6$;8?84$9;A>?9855@?888:979%<8;9<;C:;8A8=DE;A8>:16$95%$23A834@#254%2!$!!"!"56%##3;8<3>B86:8;>>?A<%49@:%<5=63858=23166;%8>%A69DA?76;:9<989@>;<=4?958%58<$=73;?:@9??84443<;897@:=;8@;@B;57?:>=7?8:63=$<<9=D>7;:7@671%3:@A?:%$8<6#4@:?B:::<7<@8>7=#?8;8087%%%4%=5?6=46;9895=$56@4;99?D"6B46@>;A:?4606$58%?=<:A"6%8=29#438989:;;9:A4239=;<9A?999BA:=86$?$95$:98;A=%8;4@8>95AA4;B>A$23=??%7;?887;?;=9?A6@4;787@?:=85=@?D8?@;788?666#13@5;9;?7D47885549:$;87@9A9@<@:647@89@39%8:><=>:7@6>8<81;?36$53@5$<8"8"?54:>?<8888;A<9=8#4>>6295==;D89;7;8945"=5D7697;=9$549?9@:2<%<=67<532#4>773@648B9D;7$8="?973"669675;;6">74>8;%899A:89=<4<>:<<=@D96@89:A=::95?=8>8$#2;@59:6;@A2@#298C<8:B8986=#4%#7$%;@>:5=66;8:6B5%B8?9846974568:8767::8D:?$7@34%8$<>8=73#>>4=9<58;#;33%%$"##$:<4>;;89589$;5<$?=<09$#%$$599:%<94=8;8:>>;8>8C8873A86=99$;9@%7;241@98838;8$%@<$6898>665@>?97::9$##%=:7##!96#7<<58%898867@9784@9?:?=58D84;270%"3464?4:96%@888D3>:7:@899A?5$3;9%33<788>54;9AB5438<@B98=689;?A?98A58#87>:85=D>%88CA9#76:$8%3@>87756?;?>6544;=8:7=3;>968?8#=$;<=?A?83%7::$9623;7;$5%;6868887;?<"345;8<9#46$7%6:%%.4<67"$569799$: -:?:=56<:687%#49;:$499986A5=B8=:5<:;:65;9:?::=898#:9<9;<8;5978<:;98%;89<97#6=:;<9<978$567;8%9;899:997:9:986:9=89;9=998?8=:8:?:::96<:;3:#<9;"897:%9=85#%<897:93<9<99587;7<=9;9749#5#%6:=955#7:6$9$%%4:86:;?:=#:9:?@?@:9##9@:97:>::;66144<$7"34A6767995#7%%"4>677%5A66334A68145>582443>614>#7$4A66$44>5#%7#33>4658%44:%%$5"?76#4%%34=45134A5%5$4%8378$44=$6"$3;563434>%662#4>66$44A67"35A67%3A58$34>68244A66#4!%#85!$3:6%34A:334A:88$35A487"2A682A6$"44A67$4>B%%49?%5#"5A?8547AB:344AB8$.58%$%4>#"824A6#344A82$$24A56:86A6664A578%2<#7244;96#56$24=6"4#5>9244A55$4<4#65:6%%7%4#4=#7657#4;#724866#43>4$5714>6#8%"4;86632A67245>:65343A673!64A67"34<563%4A57$4 -S:8445967%$4;77%4>66#744>7%9/%4A59454>87347A#9245A6;@57%666344A79544A97787%>A6864$#>567#.$5>7974A4#$#4!=766#%7A76%40:>671%4A6713:56##$=56%#4<6612A67244A%62$4>%561#4A9$32>#66%34;676%64%676!"!4>76244A673"4A78345A67%3A567$44A46344A58% -#66=8$7%5<#74=244A<355<99545;"77244>%65?"445;79645;791$%;982%5=469344>9$3487644#=69245.<997$A66#$4<%62!#4>9$<%344;"<8#6#4>5674334<66#34>7954#>9$7"".<667345=89774791!4=69%344>$89434>69234;%7%34=$7!%<57%34;"98##4;7$"34A69%44;>77324;%7%24=9$34959%5=%973;2877#44%797%$%48=77%4=;77244>!7924%65:67145A799#%$344>779@445>56%$%<$45>8$?:@544>688"$#869244:67%34=$%6%%4<%6%3=572=571$4<662$4>59934>66344;59245;49244>88344<79345>67#%2%<66%79<879344;867244<6$724>57##4;69244:673%4<57%#4;67%24=69234>69244<661%34>67234<46345<89645;79344<66244>69$#44>67544<<7%4499?!#%%4>:9424><8243<9754$>797444<57;#34<<%5"3>69324A6$9344A:=%$%4<$4;6#6%4<#4"2#2 -S9:4$5?7#%4A89545=484"#:786344>%74444A<=445;<@47445AB>@557>B=%%?8$$#4$5AB?247A77#343A7754":59555A99%644=<"4$;67"44=67#9545:%669545A87645A4%!#4A563"35/?87244>46344<97$34A8%#4A69247A7975256344A875$4A<54%"#""!##!!%"!8!!%4;6796"68A666$4A79"45A=@?%35A6797#345A89647A8974%>A7::57$5=9""788$6$69A89647>49544>796#:A966224<%77%34"?6752A#8$#4>%7144A567245<793444;692$4>57"#44278#486345>"35866"3A67124A69%43$7#44A77644$58%:@87927A89647><79544>8661%4<66245< -$6=88$45<%72"54=957145>9%6344>558#44<7654!:=77445;9687>78$3#44>66244>#7##4=66$3%;7854%78544=8765>46#45>69647<77$4"$4=67$4!4144<77647>976444#=8"<%5;$6883$#$754":=77%#75965345%757667>77:#75>76$44":77644>76%#4=6813%$4$576%4;"!457$34=672458978344>6681%3487344;8"434$:97:.:":;76535"7%67637>6;8778>"%66347;986!#!663%496612<:%:245A77545<7"344=57534=956234;56244=67$4;91=66547=566244;76%4#6%6!74;57334;4245=77547>48547>776"93<56764:673"566%45:67145A76782;66$49578$4;$8"#4766#34<46#%4!6663%$4=68244=557!;58%44;6!7144955833;56144<$%6245;77544;:67345:66244;$7135$7%65:9;6!8#"4%56445#876645:76245!<56%$%49$6$34%6633;461$7!662#$34:56#!#49%6%5459$87544!766144:5!8##456#24;75"576344:6%344;8#9225;67"2$552%4;6##3"48%34;56734"<%67$$44;76145<%764$79%632;663247"8#76%44;67334<562"4:4$#%4;47%#$!58134;582$4="#7"%4$4$14;56#4<57234;66244;47$4;7!8%3;##6#2;56$%4867$33<46$%$!#6%$$;463$4;56%34$47#3:56$$4<6633:7677<56444<56244<7":44%<#:#8445<%96%4486713!<56224>6633<%57%9%%#4475%814476645<77542<66445;8$6%%4"57$4496"24;55134:93%5<671%4<56$34;%7#4:%7124<46%4;!6$$4<56647<7%6#34;56145;$6244;67435776435;65$34":5734:"7675;67324;"66%$4;5%34;565$7472"24$71"4"8%45651$4466$#4!$##35%32"461%"39%423%6%%!#6#"4457#34%7%%4%7%!466$$2457134662#"456%2%36%%4%62%4;9"22:7"6145-<55224;$5744;56145>68335<57344;561$4;57"24>67345;767$%%:$%644;!$67445:756$634:77645;:435;76547;76245;67%%946$%4>56134:57%349573"4;46$4;47234;5#3%:6"97!9<%6961!%56664<76444:6723<56344;#5765<5$56344<57%$4;46%$3$633#583%42:66435;"86145;67#4;"855$!45;47$44<5734;56243#"75572$44<"5764#$;4344:8:9"$495543;663!5<661%4<8%725$558%<;6$344:57"%$47555456124;56#%3:47#%;575;57663;57$24;563"4;4%7##4;47#4:$%7%34<56$34847143;5734;5"#%4;8#$235:87$%44;77855444;4"514%A4%23;563$4%!96"224<7#"$":66%44:56%44<6714<9$2%4<%$$4$7"#2;4745<76335:947324765%;56648"676!34<%6#34;57134;57%3#%!765%35966324#6#224;7$7%34486665;77444<565$34;561$562#4;4$614>%66343;663$4<67344>6"7%45>67245976759:76545<665"4:6:345;99#%3:47:$74941486734$;%64447985544976$5777!545-48444666333377643"%13/76235267644;6754579434;5643;67125;56!$%756435:664"82476#534;66#.24476442;565%$;46$%4:47$$3;45$4256$247471$357$34;56224:3#6#4:45$#$4;81"#4;45$%4%8$33;57$84"$3#6#$%;%7%"4956%:7!9%#3"9452$$4#$7%4:46234:65224;55235<76545>:74%:<863;<;$33"6<:244;:"%$%24;#6$45<57%!$6144";456334;75#3;565"%4<66244<66%44$3:55%3$4;4##%4;5%7#$:9$5%34;457356%3!23:"$4:3#!7$$23%$%3:461%34>6644;5675799%24"4"$$4;465%3;66043:4644<58237461$33>66243;57124":85565<;9"54#!$24$"53!1$44#$49%3243:8$4!!!4%62%4557$33:$%5$#5!461$$:737124:55224;47#29%7"$:572$3;472448562449363;47%99365$9914#6"55%2446"237%34/$%4"";;;>68%4$447;6@77:;99<:;!85564;9:!9;%?;9:$:77;776:=:A8#7:=;=:6$7$749#"#;744244;7$85$98;57875:::;:A7$A="5465%785:662;44566889:948=6:9@89<8685=985%8"73$56=:?7=/794="5;5!4%65#4#94%42481953$65<6;8;$;7:58673381%4%6669=$:6889574572<5;754/%82866%%3276"745363$45:774.83%8#7<5=857=6;%82<764/962274;568947;5=:87=7:5854566567":0<4=$8$8"699=7#46757589%466<5=7$>675;7?7997$5:A8:$78=7;3687:8835=::"#>=99A79:4764=6?9;9$25;2;:><897=384447;;768$6?"7==9:A6:9:;?#6<2<9@9?7;899;%2729<=:A6455=8?8<0;76%#$5"898>!!9;:553%77=7778:575%<9"8:6<9#8=58:=:@:$6899687#98772/46989$!""%!46:98=8:?::/998;:66$65<$38?:86":5795$6:6279987:#"""5:9:4334"79A>98<=7?74972!65898;=9712%%!8;6>5;98"$"<:;?A27:3:6>553379A;9$<$A:667<:;:6;8#868:A=;=59":9:9>8A=:8:?8;79#>85347?=94%344<9=#9A8=9576778%;;:;"498#::#9><:8"68:9:4::;93:":8#$!<:9:7::9#849!7;B=;A8<5;=:#<#<978#=667B859;874;<<9A>;=56?7=#7?:A857?=7:2!2%$.73;"86;B480%9:<9$<;$<:;7;A87%67$259<:9A;>=8689??#8;9%%5>=9;:;9<9:6:967!$;43379;8==9;98:;9999A::;;98":76#6@:5967$6$857:3;>!776745898A98669#7D<786%47:=:?:6%99>;58$A96979:49%347;77=?=769$96%#<=;66:%8748A:"#96;84:8;7799%:%78%##7:9469897B24"6@9:886::?#99A7#"$37:<9%;9<;95#99?#A978#9:8%8<:8#33%>88!=48#4:#9<75988888975;7#37::><%978%%;!A79A8$54<%"3-4:99%=75:6944:8>;2%9669899679:8"8%87878>5:>9944>=88A:9;969966:96?68"9:67$889943"398$78368834:579$;9788<9886749:8$65;8364568!;6"5:7"9<899A963;<:8;;99=<88;7:7:7999659979#:;:!87!"$%%224=?56#78797>867:%957;15-6$!#!";$$78=:5977#%97;888:!5!856>6;>:>;98;9=89$<:A=65$59A""87:A9:9884;68958:A=9A7798;"6"=6=:%=669$A9"">;$:9:A:8?989;:9987>B98;9?77;77238;A87:888;84:<98:7$$37:$:A;7967;9<;93:A8234"<89;9><9"7";:9<76$8;46%:235$$726=8=;:7688875$%7859;9%666?79:68>99;?="6:=867:%87$89=::9":74995#!$>:#9?;9"79$";8A8>;:=:>9;55:"=>68<>7A8:#;968>644#85!$3;:66;84#<:1;;9=<;65477:74$4%3%=9D976#5?8:48@<>8899:<94777==?73=$#99:9?=888>4":A87=::=9$::<;#$?:87=6%=853;;=6%<98$4$"8?75047==9<;84%!<24;:";;<99498"!<<>8959=76:96<33"87<:9#9$;5<47%9#=89;?<:%#:54867=5988;9<$?8;668;8%55?:987$:A76=%68<;:<6857-:%=6$99%$8:#:5A9A8>;#3#334:96@%37;?69$57:>#5!$%334779;;A9?5>;8A!7>83$;?8=;A77"::677;;88>9::76;3437;B7867>=;?:=;9>756:66?<@:7:::85?9$9!;A;448<=97;62437;:797=:99:59:85497;:9%886;A99:879<88?%?#2%8987:A63%7"9887;2344<7;8E76<879!7@A;78A8986"838#:4656753>83<9?9::A=#8636;<877<<8=9;4@89#98763398895575?;888:67=;==86699:;:A7:;88=:88$43;$:=57:4332=?";<"?<=87:?7877;89353776?975?%4<;;A:<9"%8577;>;>7!"7B856;89>99!679";9:4;9:?8:97763<<:!:>$%4:434;B!8969867;888#=737;9$>8=:>99?76978"<79477;@;55976447$3698>=;<84;:%9977;:47@A#666626:93673:9<#:8333868886?82;:93;9<=858687786%69;%59<#A@7==%$7%?8>=$5:74:A6866;?9A:7:4=547859;#34><642:>9$9;:A;%8968432:><49=;A8;$333:;3;A<:9;:=94?;977:88A8%9<:A637:<97@9A=%>:<9A8:96?:7$:3:74;A44:9887478#6;5"=6=64<779#=86533689?4089::77;==:8::278988998A733:?:%86=73;::6?9$867;;:977:>:9748=95433$36:>=!674;9:A=:#460#!>98"$4:887;97A87:A=8%<99::8#564:/;83968%;=9":97329>;:9;<=998467$3;<24;79959;9A45537;?364:?%;9>;7"#332789:98:8$8>837=84$379?899:6?8=8989?<=737397$;96=:#7658.7;667%34986?!765;:;9:7#33=B<<378$8A6?:85:996368?3?:?9:57:<<:9:;36<87:8=:857;886;;86;%=85A876/;=:8#3:?9!:7:=98#96%773;;:>769:=8=674678#<%54;>%34:"6:8;>566?;8:?8:8#8:"=;8:34:<=;A=;:::A724;698%9#8556744!#3:7:7$6!985=$:>637>>$99#8"988:1##33:9!6;5:=96$888787%894269:"8834;87382!3;A8:954%47:8A858%39"29033:8:;76;?949=455:54==9"437;A7;73<#6767922789:=66:8$%4:8>:6:8834:68:;#337:==:66966485?7"%9<;8$;<5333757$%48:$689>;9>"8456%6676:8$;?3999<8<8;$6778:87899!?:<:23!67.:85:A8=8#53%269::A76:::8:7386:635856<8:6#6@8?67;=781<323485=6:194;88=;:97;>;7$;?$99;:598"4:$98#4#788865$8=#%699:;A9788"8:7A;<5768"8;=7499;96:6>9<78:;=89%79:<>966:76$8";868;:8"6555":8;?8<4=8"9??8=#89;9977=#336"%%%#3:<%99:4;6:94473#%3;?=#9<:A6<852:<:45:27:==:%;8=<46=9867<9;>;899736675@=:>;667439;933#3:337765876?:!4$5?5#%4:A6;=:6=;8>:%3:>8#;A9899%8>@8=8;7?=86:66#3775:9:;<88":;:955/6%84$#;9A%;%9>96<32#33!449:98"779896=:7;895498:76;;:<7799:6875:89:A>899<6=-5=:?:;6:76548;;A9;9A4=;:>:98879:8;966534;>98963334=?53$368;;?9#%#7?:#6<;#7<:>37$66<7<6;8;895!4;78:=;:9>=564"38877657249:57398;9999;#5?:<97:6=7998<;8867":7:7>59#6$34$"79@9%2$4:12$376>A699;:4344:?433:8496?8=;7<8>996334=4:86!:;:<9>8#6":;;?:9?795338C454465;926187;978B99$4994<;811">968;9:9683:83:42856577574<;67:7749=8887<979;9A;4398"99$9>985"8::36795%5"9>5$47:?:=<<99>6?959!9:8<57:<88;97=;97!/978::56"548:"!!79598%55:7#898:=766677988288%634=879:9!8679;9>="6<5435<76!:887;8;7587:82:?:=8;:A=397=:8"6=:?7;%=:8=:988"747867<:7627!68:963<7"8$;8:94468!6::=%=9977;7A<:993927<<#36<98A63:8B:168#8976>25;"7=285?75$7"98773:A7;:6:6"A9?;:<98#9?=889?849>;57"><:789;67;85"4348884;?<45?58398;:5469#5%66896$84=:>#@%664:9":;:9"7=6%34:=95<;6886:?;%789<<443:;#67795<<9<897768?"9@8=$5;9$;:47678<9868:866>;85"46#>::6;;<953;;97"858699<8>8$423:"87:5456:<987;12%388<6>%=:A98:7?99!#7788@6497:>63$238837988=8=8;8<%8:%=:9:=6=:?7;8776!?%76:5348;7#6475:1473:9:"7436;;889?7:3240958!35:62677865:8447475<4;76=:8<8765>7=9%A%94757=8?74<:88986<26:75<7A:":84765<7$#68A2:;969?;68?989149"%22:776#968?;?6468=8<:9$76:=8327:8#;875997:8>9>8%%3#48524:>9#9:9:@8%9:;A=7565<%:>928;<8896896598<898<6#888?876=6659#;=85;A9556#587!<96==/;9::4997379887:922"97:9"$7;#<97:;:89658$7%8?$$59$8!;036:?6:45:7%3:897789<7=38:523:8:A%6=7<7?9A6795:7%7?7?;:::98:7889%6553!7897887787%>76:95?889860565;:569@6%578C6659<68=94"687A9>$6$7;;C883<4:97699555$3889?85"6113:57859:89?:;4243#4;%99A:8#997:$:;"658:9975<9A7<8$79;>:;;"867:68$$6;99#8>9@$8%8898:887;6#23:?7533#<9=%3=<873@:7798@4<9:76$966$531!80589874<:8:96<87!6;8@45;@:84=1:856599578966:A442$46$24%99818=6::@9:3!8;87%"99887=78#8675!166?:7;986=65>9749997:579985"%7<49956967;#52346?:8$3:@7<7899;:9##6<67""7"8;::A7743;;586A7878?8=$;9464:9::;75@$"7#7::8773%6:=<:5785=5333659%5584#9#<:;;><66?466<<952388<@<$66#%$33663;46:8<=9%35$3236%$23;53=@>::946%$<:@:7989:;56$6A>:6799A;6"8%66655<:9=#4469;><86#<:974=;<<99:8;>%$23369:976<::%:;888:;8@976::$7<:63;>73>;88888#$:=;4=6386;88982888$:9$998?4=8!46";9=7:798:-727563:@;884368$9=8648A868386<:8728:<544989::458<=88A<3!9A::7899$369>768336864309;49%"34.6951!23=<2:42:=822;5:9;;:;968;A;95;8$E9:$62;:77##23":7:67822385#4234:477:737573854!82%2%6:<2329;::7489;88;::79;<69:568@79A44?6:6#?;96>;799@:46E$><:=784:$:==9;7866%73234$67#4<<97<:=6399;8943<6"$6556435;!8%89"89;:978$487975:966?9%2:8523233"23#$%23%6<$333::862;@9;7:7B:?979=875:;93:9<:8";9;3:993#@>;@53:757=77>;=%667#76<7635:?9<9<"8638;<:"796;9=<765249<8"4"9872333939:54#512498A%>8855<6:;84;89<7:7$24<45766#<#=8:87%238!=38#369;588$5$:9A;#7=;8;8@:795:@:A:65#7%:7989;99<3>933;:78974<<96;66#77:@95B:98;9:75;8;87%4:@;8=79%423<<8:9=799;;92#7!9;<7#A7:?848955<8"1863387?27"?%::A8:78$5325%64=5:77<=3<5667469$:89#:A=98A58:!59?:97;8">96=99A:97=229;967":665;73<7799@;79:<=:%796<8?#%:97:%56>596<78=8:45>=:7:>74"%88856;@9<78:99;4%24?895433;9!8>938%875779;9:"757679;75#88<99;4197:97E$:=436<8A77;98;#<666"=18>853<<;5$37588676377:::9647599%69:;973@@5::724<79;?<;29;668@;:;698:5862:@23<9;396$:87675>:9889<96;;=<9213390%89$!$6?788676%74;>5#>986"6891<:569>:7<:@559@7768=8447;<88#385:44$99B745>99877;8:6!52988:775<=885%799897;7732<;426?=6663;599765:$:46:99E6"4:;=7=98955"886848;:85;9A<:A:3893469:#68659!!"$78;9"65<9;7752%<:=8$=3468":>66365!957;7<:?77?;!59:::;76::438:%<96>:68:7;878977::797<=496"6259?;88=%$49:G68"6498>96=>$86%98745334936369A9796938;89"!54569:44#:!743:;6=;937;9495659093>7763!%?=:A;<3?65<35>7=8>6803:7977=9$235849%655$7<;<992776#!79A6#3$9A61>5$;:@55$;%838:59:A7?=964727:<4%2399/2:63376<;8<;=::#8988$78%4$8<864:@24"777:#7295$39@7:@76:69866%33>@6@76:8=8=88<638@77;B<73%535;>:;;;<9676>:9=#96:$7A;>9A=$7B<56;:;:7#9466#8#5=99533<<699:737<@896=9#7188>%995333%<742294?748!8:8:6<9#66746;:9<9:5544458:=<8#7;987:6699A:7<987:#1,7!6;84<$470669<:799@<554>#75768:$=5:>:36@6?946=79915463539%%%133<@93333%33=?8<<8:$233;853<7385?68$24<84"6;742449$;98$7";;<::73<97:==98;=7;%3#;,:96:9;748%98775:#3#9455<;9978;>8<666<<9$=;:49855#49::"9:383=7%#69;78949>8:>8<8%775<6=#77774979%;85#:97969497886@8=:":85;"84;7%25998#338879;88?8::64>8:3748863777@8;7799<7$7878587?9AA;?48;;;77$:;:6979563;6>:58:;:8;;763"87;436"?8654<4368534568<23322565:8763%4#5$<8:6$563356!79757:75=675;76;9$13349>784@72557"87;8445!9634755:754388@6329956;498656:$:893294B99;9:2:=8669$796><8=7<9#4996><>554>8#5444?66>!19855:%9;659682;>8>46%;8562585:6<76<1425><86@68;668:79;879=9@76;45$683=$239$7!26:8::2#=85$56>8323776!9;;8:57===98:474:7B8"42<;?477466;86;<;%38:698$"7<;7:578<:75:840#;3478;2#:24:::<;39:58:<9#=89:.8=775736#;4486;93!"588:33488@9;645778:3%88894736<:?5997%59=98%5%47986@:4::576;29@88?876;64/8:8966544774>:9<9775562$13::::644$7855$:6<3"62:9<$#4488:9>7=74<@94:?%796%872645489%69:657::59426<:98#5%8<<58:@?9;476569?898<><"46789<6@97A8;5::>3<75;;894129?<:?9@435/$5466#549<758!788957<:749::;37?%:8><559;:669@8824@7789;368$656558:68768"687423!9<74><=$2;976#465=:"55:<85:457::9=:478579>A:8>964579@7415:8<;%1474%67946:98?8";75245655$:%=<9!78$%?72;8>8;99:699:%78:7%1:<=5985;8%32<<86==87;"4"6355:"7697869#964%;99#56<;<:9<:9<478$4567:655;879$77"?;:<63=$3657945"%7687<6965674569454:3;;=8$89:5;689?5574"6425687!99367878885A9545746>8@746<99684:98663<;895857574:#3:48;5985344"799<=65"8!8:6#6;974439#6A<9667;79698=9746>99A74!867>9148@6;549;9;87653::8!983<:56?8#$256:9:9542<$7B85647;3376896"54>5:96>9@;5!67>4738:78:;:!"!#%44#315<::7999246895748#;874<89:5$4><<<996"%"%"9?;767;8389@67694%8995;=9$575159?<594:4"556976:$@;1=88<5;6032864:A=9;9:>::946622035;613#9:88>?<9;32:A#<;=77!<<$?;559A56@6;83%B<7:9#98<1438:8@849899?839?;.8::07>72954?997<33>8?:?;6435?;86$11$43;9A:<977$%##%$36;49/43;8;?$:78988<>9:96:88526!7>354;@;:6!=5586:><8:=:6:45$9@9776"7475897$779=833<<6<58""178@6$315<2458"63=324:8<33#3375067668;53$;9?;$9:>6%2#<@<8>3"6"4@;881876<858=67<5>48972>8:;163:;7#$#%4,$$"!468"4=65;99B7!;2=89-7;;8:A9%979#089$1986<:%76<37:52217159969>686<;85719>;<9#:277663:$47837$7%78;-##23:<214:%%3;<296=:88>;7456:A;87A5621;5%12:?6;:8:3%7@89877#39$441=>:%6%6986557"629788567;6"%69%289:533:6488<:$33=79;@7=:#<7:69<1057;:9:5#;"59 -S98%44B77%$4A77:256444>775!85A7654!:A976245A9%6444<7972%4=572!%A47144A66#74A675##4=672%4>$6#45>66344A67$$%4>939%#4>77!8334A66#34>66334A::@%35A86%75A:#"%!#76#4""$$34!<$7!9!!$!4"$!#!6$:4#67%6"34!::%#:#4"#$%63"""8<5##3#$$!8%$7$!4;"%!#!$!!"#49245A79645A978!;/>A889067A:%7;14$>D977!75A58%5.!;$$76244A5"$4>65324A67#%>67245%66%24>67244>%6%44A7734868"55/:>6775A$723A69$45A67134A658#%44A9977A7724>7%62"5A6864A%6#456!""%!"673:8;%!!!"!4628:3%5A769$.77345D866!77A88;!%87C77920>$886547>6#7$5%7/?879%87D79645%66%4 -S98$53?6754;87254=89444>773%4>78244;67813A8974/:=8$3>571508:%6144>762%%86%$34;67244>6575A77244=#58%.64>"8664"$9>767507:77%3=5651$#>6764>56244><244>76#44>676234<66#:8#54>%7675<$67$7$>8734A8%244=57344>47%$866244>9##%4>77544<;877$4:9887$74=$5%44<:56#4%6#:67""%3=3;#%%34>76734::8;64>662$4>6%$4=$6"$;47%%;47244>67324>682$4;=%324>%%244>679#184>673$4>6"2>678%4$54A79544>77:18%A77544>77$339556753:>7954#=7##44<468247975#4%>69144;6655768#84=773$4A59750#:A55508="57084A67%770:A88544A57344>67%44#=66764A86544>8779/74>796784A6775A79"44>76344A67244= -S:865%?7$7$4%=#8##$4>87%44A896#4=89344A7"344<77444<471%4=67:$#4>77234>:"?444A89144>76$44>7734$4>8514>862344>#57$%$#$=654$:%8>6723%34>572#34$967$#4>667$%4=55$44>69144;67144:771#4=861$4=693$44A9914A89644>8744%<9"$#57344>$47344>7624>69$44>57244>662#;779$595$4>7773=87544>="%@:/84>795%4A77544A67:.85A99%34"4<67544A99644>9965A97734A692"$%>%979%$4>772%4>#86344=79344A<%:3444>66344<6612:67234<76344A69647A;:9224<759$84=79$44:765%4>#6342>67344<56814/ -S79$%4<%7$45?57%$5>56$44>$7%4>4623=#7234;5%#348$%4#$7$#346$#$%55$#%466#%4468$44$54582%$351962%406624$77$4"805$1334>59544D79334>5654$$7$%4=46134;%8##$3A69545>77548571$44;6823$34A777##$4>5%6344A4613487344=7765>671%%;48334;76%%4=67%43>46#4>65#$6%34=67234>571%=46124>67$5#4%56%34=#%6$34>67$3;%$#$34<77544>8=$57244>57144#4##%6%44A99<%45A:98$33>68#%4=47144=48$%4;471#%4;57234=67244<663$4<57234=59%34;48$34>#$6%34>47##%<%6#$4<6"3458%%348%$2561#%4%%663$4::77234<$$24<59%44=57%%:87773>9675"54>566#633<46244>57$#4 -S78$54<567454A5955975%4>%7344;967244>:1$4<6944>6774867445A79444A77234>7967D79444=462;66235A7964=%73444A677#6$4=%7245<99565$66767$6$4;59%$%4A47%$#4=773$4<57"4<571%4;7$=%6#34A77$34;67$4>69635>69%4>6814A572344:7#33=7"77244>69%4=67345A$8544A67544<7954;9234;47%24;678644>8=744>9=%4:9:#45=5:#$4;6=344;98647A8=64A<=85>:=644=78647A<8345A:=547A9>544>5##4;:=75:#89344;<8347A87%45A9662A69544859544A66644A::#34;9;6#5A9=634A89647D9;74D%#95%4>495663>67#4A69635A%6145=66%44:69345=;569D77545A76545:794345>6764489$34>672A6654>7"#%4=48#%24<59"44=67%34A57244=7934 -S8:354<79445A%74%4A#=34>895#4;6914>:"$4A79$$4<5$$$4>56%45<77%%%;47$44>66$9$56##$$%4#81<56#4=%6%34>67144=67%44>674#4<%614>57%"34<48#%4=%7"%3>:86"4>#$6$44=76#%4<5%14=673%;$55#%;4$#3$56$#4;59244>57234>66#4<673%4<$%;%8"%=$6#2<46$4=57%34<#%6$34;69%34<%6$%73%4>79234>67134=56%#4=57144>67%4;66224A46$4A79344>69544=69#44A667$448$6?7963=7673%4A69144>:##2<6724>66556><671%$4>47$2$67$$4<67134=8#2=57134>461:$6#4=47"4<57134>462#$771228%%714>59144>79244=675##4;59$4%>59%24=66324>473%4>693$4A69344A79144>56%2<%6#%4;47%%4:%7$#:461$4>59244> -S;8354B89555?5744%55>:76344=68144;:76444>79$44=97$3=564#64;77"4A<234>876$449:$99344>76##>77244A69%3$3<792434=69545A5924487#$$7$4<$#4=67$44=57$%44<665%34A;234<676#4>67%#34>%6%#4=56$$4>69234>$%6%34>46"%4<66%#$67%44>%#7#%<%$1#4=$566$4>56%4"$67##$38#7$2$$$2<%47#5##:#357#$3>$3=86#4%>9>54";6;#;$7$34>92<"#3A69344D%73@643A67659124#$5A776#4%559%44=556##$%%7#%445$6##$4587#$A57%%4>69132#%9;$44=65$8##;"358#"3;#;14#:$7#$<%7#$7$;76$%%92244>66%44>#:5#24>675$4>4763<8$56#$44A?335<7;:.2$4>56$7%$%4>6$67%4;79144> -S9::$88A67$$64>5743=5634A68?#$$4A86$444A786434A49:.74A%720A:9534D666$%4A#$#$%$966;787A58535A96544A665$4>67445A762A$8767344A675450668#67D5;/87>"6675<561$>>779347>;79644D685#64A68$$4A55344A67344>57$4A77144>66%44A67134>4$134A78334>677#$34A47234;67544=66334>8%714>469184A66:/#$4A777245D6824#A97664A47544D99;464A8778/>A677$D9984A?9"44:#<79547A$49144$=%7345>7664A$56345>67784A67345D56547>66;0>A866424867244D77644>%822A5:7$44A673334A6655;46244A562A5<59144>572>45624A66"74A55244=5664;66:05$97724#4=5634$%7335A79145<957%$4;#$8#494659344>%66$577$7%$#$4=6##$%4$91%#562$4=57#7$34>6$1%9%6$34<55%$%4$#$654>767$#4%46$4>67%5#$4;6#5"4>%7$4;6"6$95633$73%;#967$61$4$7%$%4=4$6"3476144<466#44<#%#5>#7$44<"#6#%=56$2$#6#34=6$34767134=5775#334>786#%4A69$24;:68$$4#%65#:%#654;69344=4674<56#7$4;#$65"#%4>975#2%4>6924A6933>68776#334A57144>6555A7928%5$6#%4A67%44; -S97555;%674458244$5%#5%%%68%4;57144:9573#44:876%44879556:$$%$%#4<57%4;57134;#"8#4%6$4967134;57%%$3;57$24;6754;$57$$4967%%4:58$#4<%72$4;46%$;$6"4;56%4=66334:661#34>69%34;57139$"4;66%44;6714<%#6##<7724$57$$3:"6%34=69244;66%%4:67234<47%4<46##<57#4%456#4"46%7##4=5#%;%7#$4%6"71;56$24;572%%<4$334<%6#34;6634/6581%4;57$34;;772%4;%7%=472468%44;$467%6%#3;571%4;%72"846#%%4=47#4;:1$%4;$7134;#88:#4$47$%957$44>66%;66144<67244=68134:4614<57%4;979$<%61$4<57244;56134;57124<57#2=$71458$#$395564>67144<598#54=671%4< -:$:69$:9:893<:<987;49<:955:89$"68<9$":8:%6964889$##$3<:<9:6=#8987689<995<8;98#?:=:97<9=<998:;496<9:6$%6;496:#8#%459:97%<8<:9699;9#489:9849:676<9:96#68:9$66:97589;7%58:%$#9:;966=8:95$4#;9%99:943$9:966$:;99668:"%%9:9858$:7456898:9689;:86:9974897$%9983::897699:94!9%846:8;9399<697"8948%9=97589<7755:;98$9:7<8858=<9"::796"!98699<:96:9<:%89;956589:78$6$:<:96::=998;9;7$=8<:%:9$9::34979%78::97:9=996=9<%904":9<955"89957:9:9#"98<997978"8$:9<976789;98799839%986#9=995<8:75""988#::%9966:<:96:9<:9679;:%%S -48<:9#79;8"$9$6$3689;9%%6:6#"49:578%:<98$8:"98"86%49990"7%9678%%9<85$7$49$4646588<985"9:55#59"963#8<498%8<96$<95827;5577:96%$#%48536::95%8<996%:%<9:56=:7578:8755%!88%/4$#8;8"#3#""%765#386#%%3%#:57/69:95!%/%$6$!!#$695$699591;88=#4#0:9;955"#:%%6968%.5:#"8%64:8#86#%9:86/:97874.6984$5";995$0;:7:#505:$99$#%49:65/69#%%/$9;96%59#97#5%7#%7%9:78299<98069<981<9<:9%6:767;75?7"8"69<:9869<7#8549:9856"785549:48##9<9863#:67%4%9;<2:;%9:=:966%"963#9:98%$89<:9:%9<:97#9<:83%9<8$6#9"5%%%7#"95665%%:<::85:9@6%7:<89<# -<::967:8:9%458;:95::=987?:=997?:798:6866:@:9368976#:697:96997?:=798<9<98779<:95$:788@7<=<9;?:=<9:<7=:8:<:=997<:<68589<9892:<:;97$599<9%$96:#37876.4698"::95?8<<#:78:?:9;<::?<;4=597<8":?:95<7<96#9:<<98;:6::?:=98#<:=99$:9;443:6:=:9"89$:<:6:845<8=<97?:<97#:9<:97#7:?<9;9:?:9%?:<89::A4$9:97::=<95697;<:8?9:;:6:#:;S -<8<:9:<:::6:::95%98;:96<8<986987%7<:<987:9<997779<:93<97%:6?8<:9#?::98?8=#9699;:88<:<<96=::796:8=99:<9<:78?#9<95754#=:9:?9=:95749<:94;?:=<898?:=<9:?%<7><9;9;8;=<96;8676#<8;76<9<:97=8:986:;:97<:;:;<99:9679<749:987<9<:97::;99:<:@<9:=9>="<<799$::7$S -:8:84$56;7879$6"#686$#7:9:691$<8:684%876554!##:777:9997"77677<99:83<7;7#598$66#6"986;$845#;87488;8665:9755$97:8$9:57497:7$699985<8:5%"%:94#597#465$7#%$:9:9%$$$$###99:7%5%665$5975%$98$985648:90%36#:4$%#8:74"6964$396$986<8:974:%95%68$7:<#99#%%85$98$9%5:9:468;74$$4%$6%597976<7=9654;=875;;:848689#89%7:8$%%3:947$57799$4#%;=;98%26%=<48%;;=97#68::7%#;:=957<<=99%<=?:9:#<<:9478;;559885;6974:8;$636%797678::557589947::8%:9:557##$9976676:8:97#7;9:89;:$7569:9388;974:9:5%$:9:9$7#967%:8:986<9:983:88=994<=:889%9:798:.##$64?:97<#=4":54#4$68043#$%5$244$;=;974=7;>45>:9;@=52=8=?:78="=3=?:=69$<9:;=@78$=<=>8$A48??8=9>?A<;;:7<99795<451@8=64;6#:554$<@=::<=9<>478<::94754$1>?A487>=>;#9>4%7>7>97>#74885585<57>4#3$<;=3$A9:4%7<489480<60585%$9$85>?A4$>9;487897<97<5<5@$7759#9>444:$;4#9#9>485<7>975$9/>97589>?A769>?A5:873%<854:/9:>4$5<;:9>8>465:4<75<2##4575<=A>59=%<>8#>44#7#65:87:5>974<5>?>4$7>%8A;A:?$85<;759<7985<$;$787>?=789>?>9A4#9844#47/:87>7>47797>9A387:=>4#>4;9;7?5<;;=:7<5489:944$>46;4558:5393=9921<9;$74/<8:872;87:518:;:91<87/981;8;%=#94<9%7#92;8864.;7<:944#9<<95<9<"6:8$87511:$#77:98:95#67995$598:93%9896$9::966:9:9757####74!#%#"6$#$$"%%7$6799$##$###68"79:97$99:$398:986<8;:99#<9;:9%?895=968:=9%9<#695789876:9%##4697%#%8137:=#8<:93#8<9.$777?9=989@>%9=<9:@$:=<9:?:=<9/$#9<:98:::95<9;:5#9%;<796":9.%99:8%746459:457;#9;797<8<:987:9;0%%77":<:9558#4%%.4<:<::1:<79::53#489>;;S -87:77%88:675:8;998<:#953%87:4%#$79%98$;8$7#898799$94$78%95:8:$=993=%:;9556:8;#8585::99766=9645479:6$#;6986:<8:63649<87:$#"<9;;"9%<898%77#68:::8#<9;9767:#4:=%9::98=9%9$6489957$96#65<:8%%99:776:997:%8<8:963;"$9443:9<9%7#7:6$66#9;995?8;5$:=7:<97<7:7:4%365:8:787:98:7$5787%6:9;5$75:8%$%=:9::687=:>7=S -9:=<93<9:847:769=<:?<98?:;67#3?:<78:<88#:86<:=<98#9:7:987:=:91##<:<:9;=:=:70%?:$=:9:999#:86<9<:75:98;:9767:<:9%:::983?:<:954=9%99:67<8;:8#<8<9987$9<5488:8:9496;:6:85686"79#9%<8;9956:<:96;9=966#99?993:>9=69$?9=@"98987=6"%<:<:6;:8=96%8:8=<9:?9@<9;::=:73=9<97788=99%%6%#=:94?:@::<8:>:;;S -9:;84%66:99%99<:97??<87<8<98.$<<=:9:?:;<9/4#5=;=:953?$=:8/$04%<::<79::<69":9=:93:8969;6%:<:93=?9=983:7;%98?:<7534:9764#<9<95$<%;:8669;5%8=585#:794459:<:93?:<<77<:=97%7989:88;9<6:698<:9$?:=:888$<6#%4#:99997:::9#789<9$6995#89#"#49:586::<966<9<:87897797<:69434>79344>8$67.34=7#"4>9%77#$4>767"34>76#44$6%779074>669244>79245>%9%134<79244=7!;#44>79344>7767%<5=6<34!9>76723%4>67$44!7%44=!79%5%4>67144=!%68%%#3;%5##$4=667!<8>77245;88!891$4=67144967144>7965.65>?245#=777%9%9>699%9.9>67#%44=66234><#34<69244=5$7%44$:8895344=:89:175:999344>79%24>692#"4>69$%4=46#234<47$%4<67$"#%<##6%24=82=$34>67144$8>6713$=56124>79344=792""4<68$45<4:7344.9;%9$67%24=6713440<89:.74<56765%:>66954>57%#%4;876234<46$%%$4=67244>67244>";77245>446%87144"=9#8##2%4<67$35A7954>66343>66144;876#34>67345>777/74>#:!%44>93;1$4<57%44;876143<766%3$34>7$67%34<66244>56%67234<562$34;# -S9455?67244#4##%7%54A99;%45A;99$44>68#%4=47144=48$%4;471#%4;57244=67244<663$4<57244=59%34;48$34>#$6%44>47##%<%6#$4<6"3458%%348%$3561#%4%%664$4::77234=$$34=59%44=67%%:97773>9775"64>566#633<46244>57$#44A77347=8<144>67#$3#$4>86%614A%8%3"4>66$327%7$4A676:6723%4=673344#>7765>963%43A49545=69%44>5%7244$9#%4>6664>79;834A66145<99$7<83798%7<8672334=7676#$3:46%#%#4%7134#;6$#4;5724>#81%4=58%$##2"#71<46%2=57%4#58##4>563#4=49%34$5654<47243=%6#$4>665$%4A57144;66334<46#44<:59444;<5732499%50<589=:$$;8865527%453$$42$5#79%?:D??27>??39?@:7;8B?<6895555577857;7@?<9%76429<87?%4"B??=85;<98<;=694B@8<$45###:32879??9A6:73059;:.<;>;?65;=:997%7:98%?7.%507<3@=@6<5>@@#;=:;8<7<5845517557:2:5885<==;=<;65.85755<7$44$8187%$474<755;678582244:3<78%58==8:38@=7;<"44:/545:55415087#;6782==9$7723<7833280648<08>9985$$=98;;685:484376%456818;#4%7-88967$897:583541:7#43482<@=7<2$463;;<;%87<.$43<78885540$4:8465<4;;<968285855838796<7>:;580>=;<;7;5:#775855<38167<@=484#>=7?=#;=8$42:48752487588%>@@8<9@;@?=::@:=AB789?#7;7779957@:<@7>7?48<@;@@7>5:9;@99?:@?E=<=8:.846=98<@;8<#8>5?<7853>=95:#97:=;96<$>55%9BB@@9>7<@=;@<6:=>?8D?<>>9>7A77@88699BB?@6649=?8>5>=@8547>=@;96:1<=;985D?8577>25#%6964646#%%6=<;9>=8<#:;69?=>:6"$@;:;72@249987784"54%;#$$45#4-#####$#"$%%53877;<=56"?<3>;7769A#7A99?<;:26:9947>99<5<:=?=6959:;:<:;9$><:@?769:#9;569987/69;<<"B<7@@54<8<%9$=@=735:5$4@9D:9B@:@144#8:8?;:5D=:B855956#9$872#45<>@D?9>9?3A<87774:9;AB=@;98:>;A:>9>@;@=>: -9<:%5:8:95#$"78::97"::<8#788::5#:>;97:98699<48%5687<6965899$9":97%#"=98689:#94$$#"$458"854:9;878775!9%$#7"::995"98#95$#!6!%985$9<975<%$9<:76:8%964:=98587669959=757579::@=:9695998;9856#9696::<97679::96<9897658995$%87%5#::98679;9$9%#3%:$9:6588%$:9<:96=9=:47:8::9:688::73#::<:978:<%77$:<:55=997<;<9$=;=9758#9587%B;#939969777:<47"#=877=??97;7$9=:9768<:96:$#"7669<8779;4$879"::8;:98#:<97<98"9#:::87$99:96989:57<9<:96:99::96:88:966:8997":9:#4649:9%:669"974%::9$779:74:9;975";9<::%S -8=97368::9597<98%;;%:955<9;$7#"4"69;83:;:945;?:99:??977.!%:62%7::992$<9::6@#;9:986!96#"#$=:6:8%9579965#$%99;9798:7##867$$::94"7!84469<98769;4""5%;9%"!8::97%:88#5657:98379:996:8:9398998$;::757!7;944=9<:75=@;97::<:;S -$:84358:9399;9878:7%%7:95699684#:9;96%99:976<%;796:6997<:<:9698%;:976:<:79:;987:9;96::;:39987$#:9;:88:9:?:9769:9%79<:965:=:96:8=:97?9;:75:9;984:9;985<9<:9$98::8$8:998<9<:88<7<9588<5%:658<:<99!78645579876$:!8<977798$%9998$%8:9:76=S -9;946%9<<:79=<6:8:=:799:=9#8?:9:78;:66;9<98$668<:9:8=98%<9=:96?8<999=#9:997?8<93<:<256$<":;94"::::957#99436::99499$53897978<:7998=#8<:97?:=<47<:=:9799<666%989$=:8<98#68:975:=997:9;:96=9:99698;897658<::=8<7%6=:948;997?#9=:=89=9;<:=<4%?8=992$:9=<9:$?#=:;":8<:9:?:<<=68=6887=:9"<:<%69:2#7:789;59898::7:"?:9=:==:=<97%#8<9699:997?9;:669;::$75?:;86%8=:97#:$9975#769#9=:96:9#679A446989%764:;99:=A:69#:4:<<98;<99959997#9"7?9988<9=9:"7$44$4;479595::=99;99<99#9:=:99:<87?464<7767965:5:>79;8;S -9:985<:;:97<=8:98376"95439##74#;::976$9:9539%6975=9:667#698560#9896699%:759964$3:9$965:979%398"#78%94#:::8$3887#69:6$#%5%#:99966:9550986677543::897%<9:5%$$#44578:94:$:94$:$6864;$96$8:;%% -S#$5<7$5?67244=46$5A;444A77%%<564A79245A%7%"4A574>%96$4"9%<<#$4=4665>5%244>57134<8#9$44">772444=77$4=6344>7$344=9#34=:#65>57%45A763%75>9%82%5=67234A73%4A672%4=66244>%7$34A6775>67234>672$4<5%%4=698=5752A57#44>9=9249<344A56144A47645D77347AB67644A95$44>776%=%6#:69:/7%A772;%$396614>8#344<7734>6$%44A4%%:$441;9244A56144A6344A66345=95$28%7#%;#"6%=524<67$4>661%4>8"$4>%%7144>5%30<<%61$=46#4<6834A5654;:779%"A661>6 -#%6>%"55A8774A564#5=$5354A471"%;57%4<%44#8A557#4;57%44=$$57>57%34A68635>475%34A47!%4A6633A66":$7$"$47%%45367A67%34A47#%=7724866#44>781%4>67#44:55:2344A4#1>#7%##974%4;91$4%%7%!%%$%$55#4;$6#4%"5#%4=75"$4>5744;7144>56$44;573$4>6""#%8$52#$#!3>%822=6$43>6#44A58#33>57#3:$8"3;98"34>57%4>5814>56$4>%7$4%<556#39#14>6724A!62%>472A66%34586%44$6"3;%66$4$:4#%4%724>55144#=#$$%=$7557133;847"4A66%%748%3;%6##4#<68144:68%4=7$73#4="8#"4;6%5514#A66544>$8#4;$144>6#>%6$#;$5$%46%34A562$4>$7$$%=5%%>46244956#4A7344A85#2$>48#34> -###>%8:3$=67$439%77##395%#"45#94$$"%;%7##:4$7""9#62%$"$#8%#$:4714=57%43;485$:%7#367%613;#%71%$4"86"$3<$52#43;857#34%%%%6388:5%44#5#%44#:56$%$86$:$#3:87"#3<7%%%"#%$$%%4;%618#7%##;#5##%8$7$#%48%343:##"$"612;556%1$;#6##34;65%$#287:444="66#34>43%%:%#$3#;7 -#$6<57735A667$44A7954A77344<77%44A479145D77:135>#978644A799%34A67$#%$$"#%##$"4>763$$4>47744>6A9#444A95$4=%#6$6144>99/3%$34$@69647A79245A?1444A%75#$4>56$3A79:96##%4>66$44:67344<79557A89334A66%4#8#77856$4=56$44>66%#4=58135A77##%23;6%7##$;47$4A67144A%#%5:054>657"45A465%34A7#860%4>$771$4A57344>56 -$66B897"$4=69458D87645D798.64A7"72%5>=77143<$##74#%4A#3==?245:591455:6%$4>7#7#6$$%5##3<661$%%:966#$4=65761$4#.$%65#4;4%#"#;47%$34>66###4>4%62$%4=56#%85$7$34>47723%6$#%%<5#"4=57%3>66134<58#%;%#%4>$66#847##4=$6$#4;6724497%44>69324A56$4/8A:996714;756$34;97#24=47144A776%3A966144A656#$4A672$%4<6##$8"$;7$6$#$4967134A66%2%4A;792%4<671487785;3A671%4A696$74A77%434A#76244>6712%<67$4<:9#%<593734>%83343A69$45878%6$7944#:;=5;$4>7"<3%4>67#$3>$<#=978673234>%9$324>696#4;693#3A563324>691"#89$661%>46#$4;%5A6924>591$4>47#3%56$:565#%< -473<8<46%#:9<975<:<995%99986:8<996;9;64#69;997::<"98%5B=:98:9=9999>$794$=?994"79=:95%<9==:9:;=?:96;?B9#::99;:99#==@97:7%$8943:>"=:95%?999?9=<6%4#?:%=%;9<<;$>5:8<<96?9=<95?9::536%9:9"25="=:9;679:;78:64:<998""88<$$8;"6:=?:87#9>4$89:<8=%9%:"78;:98??:<"983%%;:8679<%:899%$$4:%%%$<9<#7#4:57$88:6#79;98:?:>;98" -9%6:=:%?#9;;8<:9569:<55:0$<9:2$8<8;:8$#69;:;?68>"7@6%#8=99:74:<;<@:9$=9844;?9=997?9:883<=?999;:=#:97<>"@797<:$:87?8;:857<>6<99866:=%96:;99$#79:95$497:6#668666%%446#7$%6$%667$$99;48"36??5:57#99<:966:965=9<@?9::;99%789=9;:96$6"8:9;975"99<96=969=?:=<9#:=%5477%5%8<97::$74=97#<9:95%9:966=9#:989987%4<$#:;;;$ -$%<#8$"5?56#5?66$44;8!8#34#887#44>6624>77145=:66147>:86:615=977##3$;57144A6664>7$14>%7$44A77%%4;6%4>76144;68%>581%;%"!"681$4>$57"%=9#3=46"$"<57$$4577:5844"844A:96344>774%4>6$%;67$4$57145A66"4>67544A6614288#%;56%48968$45=:47$44;762$4<573;67%34>474#4=66$33;7%4>5713>"75#$#%7#!3=45$"%;##81!9%%":58%%4<%%$:47%$%=47"3<6635<61%4A66%34;$6$%34A9%7%44=9"24=672>%813>56#4;%7#24>665=7"%:$8%4<56646;57$$4>57$$4;57%3<46# -$7B9%;%54A89:.64A977144>8%7$%4A66$344A69444A778$64A7%724!8A77344A672%4#6$234<8%8%34A;972$4A673%4A:$;$44A6"7%44A4#624%3A$8134A69244A67144;57234A662%5>673.%$>46#$$;57$!3<57%3A67%43>67244A67"4A67%44>$7$$4A67%44>9#"614>69%4#4A79:.!64A6555/:A67%!6%$"5734.$9A557$$44A#561#$4>6$!6$3%4>%#6244A46144=$765/876714!5>9$6344A<$%;##%5A6:9245A99545A89144A9"<65%%"3A678.55A772487A6754A671$4A66124>4$8$4=8$;#%4>"7$$49#8$#4<$#<46#$4=%$!4<5634A562$4>56%4>461$4>58144A57#$4A661%48661!549661#42?"73%4A5634$:>67#%$9A667 -S9D:;;298>:@7;3"49795%4A79159669245>99447A7983A::@#50775175A9734"A89623A6925A:%72#85A97754"%4>;56:%85A#79%45A99547A4<$@5<387A79:0$769345A4934%4A7954$A796%47A89647A99:$#9#:A<$?2435>$99;44634A?544>9#?79244/9$=#=$4"#4;7#75.!!8>"56%44<7964448669%%35A796%A667547A77244867$%#%5>#;58"%7>""4569447A796871>A4;96#.<=86534A69%72#A9%6$"33#=:893%"<%"?;0>2$>A7915135A79245A7%613%24;695#5A89:$#8-77<6<87%!4>657!$44<49345A9$68/#45A8987 -9:67$#54436#8%$6%89869869$69#898668%96#99;996999:7649%;:8679%996;766%7%95%89::8659<96566;99799;9688%8799<995;::9%$9;:5469%64:%$:99%:9<:9:;:<:941?:<<5:74:=:9$59<:989:=:98::::3;9$973$8;99568<:9%=9:96#:9;:66<:<:95;8<:76:9<986:7#996799937::977:9;:7%<9:97#4#:996:"9996=:;996:8;:9589<:9:;:=<9;779<:97:8:97$36$:96#;%<::5?:%<:8?:><9:%><# -S$77%45=883164A67344A67434%87$$%4=78$$035:7971A%7144=65#.:A7774;9:%444>8!9"#4>566#$44A966134A:%76$%8A$#2A9877414:A67248$7344>85"$4A5#4!5!8064A56234>56554=6%5%44>8$#/988:3967$$%4A58$44:#54A662$4>#8144A65%409A99$%.9A77%4$!%=45!%$56$44768A$7%75$4>77834;6366248#6"A6#%42<#65096=47#24=#5#"4$48":"5$"$6%3$5#46#$$4"6"$#4%"346723:$%36#4#956"%4>491$4<66$4>#83"5>49681%4>#66351462%4""7"#4=77:%44>57%865$%">672%3A662%<$62$44A489234>47";75244;%71%3>%71!4768$4 -%:879887%88%$#$9;97$46$9#6$9%96#5#6<986<6<98%=:96993#<68::%.1<:;#94$8#<7;997<875569:5=<%6;76567::5%47#98<:98<>%9957<8%966:86%$#9#954599::9%7=7$:8%<8;673<:;:%;::8<94:<8:997"9>79?6:965:98:498;#:<<7#6?:$<9:?9;9%$87;992"?9=7#978::<7#64$<9;:%96;589;@9::96<96?=#=<88<9;9#88$2849998#48:96659;:;?99:935%<49085$698:5:%5<9=7698<98697?8=<97:9<595:8;99#?9:9944?9<:997%9:977=9<:96;99:8%<$<:457#=#<957#8;:9$%9:%$#987<8;86$149:997<8;69."998%;998<$92388%8<<#<:9.$<=#;945<8;68$%$5>765445?;;%6444>79544>79444:97544#=775"4>56$34>66549$#54>45";57543A8674A:7444=565$4=;%%234>92449$75#34>?:#;444<%?77#44>676$4>78%:565#4=%#%>79544"=867665A79$44A87544A8954487773>77344;%554";6763=66644>6"$%6"3A:#75A87:5%4>47$:613$4=65$5"6=676"4>77%44>6792=4$34>6757 -S955?87$#4;77"7#%5>=8%7#4A89%45>99554;879444>79444A"$6623;8964>89154A99634>782%4=46244>48774<:<7%7344<<<:%4479==9%#31$7665/6>778:%4;;169:?:;:<344;77$4%?8$6$44<7999:A<>?=5=:977=$?4$44977#3#9$$4;778787;69734$58A$7=#343>576544%<77:=779<:66=<74<97$:73638383728683#8:5$7$:97234=%%1441:5%#%4%57$3##"3%56$34#:24#<33>695$4>76:.34;765357867$$5A<6;544A79544=67#3=$7%6734%4>67%$4>$674A7964%69344=%7%4%4:963%4>46544A87%44>673%4;66834>67%44%?77?625%#?598$4A76345=<:789643>97#%2 -$66B77#3%68$%5A775#5=59%44>56%#4>671%5=6%%515724A573$%5D77:377>8;547/=;99545A78=144A86$4A59334<6%>78$##$=49144<56#$:24=4#61%3A79%45A69%44A77545A793434;=76#$#%>786;8348$559D77245A793%4967$0445A77144>7#%#$4=5"%$4A$6244=67$4:47%3>668#7#4A79445A61$748%$;#4$<3%3;166144>67244;65$3<47%34A67344>77%7$34A56$344%49234A996344><14;%%4=4;6679%:A79344;96124A67%34>47$4<47$34<5622=5515$=58$%4>57%#%<6$57%$4>#6##34=46$##4=776$3=56%$4>66%#"94714=56#$;46%$;"6%4>673#4=48%34>67$34<30 -$7<#7#55?%#%83#5>##8$45A89$.9#9;?67$#$4$949$;444A6773#5$$4>79544>5244A%75##4<77$4334=67/9%=577%#44A79840<:6"1"$4>%7"8%%6#$$4>%76406D873:."##>:%?2335<67%4A7%59.:$8>4758/6A467#34A5667##%$4A67%%18>671"$4=6%88%7#%4><%=$4#%#4A76##;$#56##34A559:.44A46713$A6#6244=#%###;57144;5%7%$#4;46$%$:A4;6687669$#44=#%6234>767%%#4>672####$#$:662$34>5%%"4A;89234>671$44><6;540%4A56$$4A69%449A66$4<%%##"56$4#%7$%476%4.6<4#%#4>8#@24#471440?%7$$4;897%34A8 -358;9736$"68$#5$7:97488$##897%:8%6;55$88$$98$986588%9:95%59%<99678$8:5#:8889578%9767658:93<8;:599%9::94%###95%9$98%:88;766$#879::6%568992#5:6;9%:78#%$6#%;97379%%%:99#99$:87864$#99;9577%$#6:8"76<7<:8:979;967%98<9871#55877775#4%8"$6:4#%#7#%#938978;98$7#67;#97<79<9577"$$8:965%%88678368:$369;#847%#995<87%8=9749$;7657$%:9%<:;9979:;98#4;$76%;$9$#;86::789/#<$%#$898#%6%#:96697:994979#$$7:98$7678=:9#74:#<996<8;75$8$774678;:57%:9<:8%:9%$6#$%#:8#6##856":779:8#6"797#<9$%598;973=:9%8=68<97;9$378#7;955$#7$<9::87$$%:659978$#7#$:75#%987697;9<#98698#%7:7994$58#$##4#%#8%88$""7%7;:0#;%9#965:$7$879699%?89?99:669:967$6659#448"$$936/$53;:9799=95$<9997%9$96669#96569%;#98$%88#767=48#$#76%7$$:$9%4759$;97669<#$ -%?695352;";58%35>67345=66$4>67344A6644A7653>$6=%44A4775%:9872495$1A$7244A76244;57#34A79545A776%$3=6145#<14>6614<7#54$?934796244;%7%44;4734;466344>671$4A%%1#4;6655A563%4A69544A6%144A8634#A67544A67#34;575$4:%35=563#44>66%44A77%#$#=%61<48%$4=56$%4>47$2>%6244<562$4<75#$4>58%3;47244A%66144=$6144A9344A586%>47344A69545A637$6#4>563476224A57144A657%5>58#8%$::54>75##%>6615A:59;65A9677?65##24A78545>8:?244>66%4%66335A66544=67$4<#6776?97334=67$4A56$44> -6<:9;8:?994:9:<66#79;:#9=<::76369999768<97779=:76659:95<8<898?8;:%89$%##48:<:93<8:75%588<998?:=983"<9<:68$##%9<99"#5#9<:9:<:=;697#<96%58<:98#:;4##89;956:9%6%$49;4975=<8<78558<987697=797<:77::9=1#%6899<98$4:<$9869:988#:%97:;76:<:5:8:97::9673:89?:942<9=7=7:7$:<:64?76?@%%66%55$##9#9856%$98677$9=:9679<998:7#9;97368;993%#59<:87"$##=$:7%"$99675$8;9%:8:7757$$:;89:74:<97%$"%:<9957%8=99::9<0%3%##9;7985$:<997#%9;:5465#99873%%9$:7757"9;455$#8<:5$"<99248#:%:74%:;7$" -SC98555A6755588344A715#:A$644"6>6144;976#4$74>7674A:8643?772%3>7#:#55>7924=4$%#3=#7134>4634>%7562%4>6664#8>6%#3"#34>7664>45"$"4:75523>#75%1#4A57$#33>87345>77334>67$3866444A8764<6714.%$<6%%34>48#4%>57$34>6774$?675445=:%45;=954>662%3<47144>:64;4#%14>767#:#:57#44A<;7344 -SC87554?8%:052A%75A875>47%%#;8"$"4A7%74#>662;4%34>#6344"8=766"34>"$73>7"331:/=7774=568#3"=#$$34<7#1$>#%67#4A8754A9%74#:#3=#6#9%4>65$4>#$%"#65"=$##4>476@9A;56$/<%?7%4%;$6344>57$485624#=88674;9%8085A6835A6%:.3570<624=%444A6617$6A8774A95:0%74A9%9A7777/79345D76244$<4%9344A779/:088473343A79547D669:/64>#:9#%4%<672%4A5613A79:175A57%44>466234>47:087A7;;177A69#%434>67%$2:?97544>8985/:>#75389A7:645A89544=$7#4=69544>663%##9>763<334>6$%8144>9%?6647A79649:877545A691034:8:5%8#4364>67637>86:/<;4::6435A96;28>5795479795:2..>D:;854%=895%4=47"9$%50>D7:913$"3$57124;#6"3;5714<%#7$24>6724A669174>4734%7A679054A?8065=663%5:677#:467#44A79444D79344=:57;585A66%144>6%9:%84>9;374>D<65673$47D66694"%4>69%47D:%=55$:#?87445=95@341:A76245<%67224A79770>A7934$475$44A696 -9;:9$9:95%39"9877;98:3:9<97$879:":69;:856<:76?7=<>:;:8$46:$:956<9<75;89":6<:=<96<9=<89:?98!9#?9;<48;9<99:=?:%;7:7;%9=995=69::93<9"674##"489:?<:7<:<:95:::<:9859;9$9969679979:<99=<::<98;@;:99$:9:9."=79::%<897:%;7"99%14:8!$$%8:995:6;=!#99:984:=#;:98;#%848;%:4%#:87799<767# -#79$#%%496##7:;94#"";;:$993$9857368::8%$99<:;7$8$#%%889$9579::6#7$#99=96%4999$5::6"#9:95$9#647%:;95%#$977:8$556:9%95":8676$:75%%#698:498$96$6=$%=:9$%8$:#378<96%:97:8##$##%995##5:7<:0$4:<:76:#=#8=967699#95$7<4"#<7%#=97559$:55####;:87788#%9<9855:9$9#9%86#%#$##%;:86%9::%69;9767:S -S6=9948D9;85$%A7944%4=:5%%5$%58"$4;4644>67"4=47#%4<%8%4>668%$4>661%4A66547A7965%884934>76144A461$4;$742=5723857%4>##7138567$3$%>59244>898#<;969545A69544A96345A895345>96:%7$A67344A:6679#34A6734#A77545<66;/%4#?78$%4A;:72424#?68144A6#<244A469245A59#44A79344A9957$9A77145A:3;54/8A699.%44%7@144;89934486933>56%8542A66145D77344A6$725/66245;499/%4A79345A69%4499$824A668%44$?67345D67144A%9334A8:087A$9#5#%#4A669234>9:75#5A77245D:8267D78623=757$44A%40<%44A<6 -S%:5545A9;54#A;693%4A;89544A47%44A791334>79444A%71%4>:7@544A67534>79%4#:A7665%4=9%43>79144A;145D79545>779$64>6:445>7;8%4>778$44A887%44>$$$9$%;46$9344$$%$<$%6$$#48""5%$7<%65$$%84""#%2#:$#$58$#"3:59$35A#=:9445A76:534:468#$4>$%962449656%#$44=%66"4#42>472324A77%;%7445A577#42<#7391:696$45=6"975$54>6#"4:8:;587>667$4457=56:35077%"977247>4869#7397%<"<69809:465%44A69134>47344=%732#>A:;;%4A799/585D559144;49224<671;%$24>67%44=:87244>#%81$4A69%44A6724$4A791#4>48##%A795344:99$47A=:77$##%4>:#%43%9?87$<47A7;644D8:3#5A97$$%4#$;6;545<79334>89%44A4;245;77#8$5$?666#5>598724#7=:4:345<66652495693$%7A899%44#95#6#414>562-%$A66$#3>?90?8$474>9#$;:234>75144996$$24>$561$%4<=#$792444#3$#6;%533%$456%5%#445%61$%%"# -"%:4"#$8;86%8"<85398:958:8;95$#686766$9<:87558:657%595399$54$8#::$8%%6997869:76$9$69$04678%#$9$#7%6#9$4%496%6:$95%4%78:7%##$8$7%;896%9:9764:977%9857%$:;9948<99789;7%$758;:8574:#%4%569$#459::95678996%6:7#%7::99$59964":89;9#68";9:8567#<97;96$$9:9546#97;9765%7"7$74%#4%$5755%89#54$"$$"98;6799;4967968:95$#999:766%9:895:9;986997#%4##":8;:868::97599%7::69;8:98#7<:;96$#89:966799;977<9<97:$:899"88:86%9996$%89$$656:978"874:::976#:99#$ -9994?:=:97<9#::9969:9:89/$<9=:422?9=5#?99:?:$$<95;>6:9869979##496<96%;8=56475;8$%$;4<4:472@$9%>5A9666<:<%9#1;?:<<9;749<::$#9%63:9;:87=97865:$<6%5798=996<9:7$76%9":6437#6998<99?<75%:?:?<48?::9678%<5856::84$#7;$7#843:9#983:9:76$#649<95688;:;<98:985;:#;964$9<;97$:<;S -S:8454E67455?6755A47835A87$45A672$4A4#4=#7#:#44>%65;47%44>$7%4>5%%<%##4>671%4>$7#3>"75$4=561445D8674D667544A682>%8544;67245A66544A6564>%5$4A%%144;57145A573334;565:46#%44>6665A772>5%%44>6%%4A6657A77345A5714$7"#34>$634A$5234%##6%$4=471%4;56#%3<57%<%%14>66144A57334>%68$44>67344>$1>48%$4>42%A86";48144D87225;876224A6864A6#51>56%45>6653;%8%44>661:56144>46#24A66224A5533A454#4A5%7%$4A4624>$6144>67245>5$144848244A561%#4>46%#4;$6#%4;7244=56%4 -97573%$:84$$%5$7#%86555;9:;"6$:=92"8:<7:9;7:;99$#%9=:96<8;68<8;6#"$;9<687%<::%$$<<#=:9%4$$:87$<98=:911:$:=545:8<:=:76<:9?:989:=596:9=:73<:8489@?:=79:?:=998?:=:539:;:9$<::5#7=:8=996::;9%3?9<9969:8568=988"::=998<:=88%1::<:83<9<9947<:=?#9398;03597<:=:9%<9<9586;7=8;95?9<46<7%<6#7:%9855%9723$<:=998?:=59997978597::<:8%;9:7755:;7768<99:?=%<9456$:<79%69986.%;9<:97?8;846<9;796@#9;7453%::<9:78:99#6"9::$/#9:<:;!5$;%#66%"1:8;9579"96#%:8787$%9=99:8:67#759##%36<::<:775?9?369;:7578897397:566:=:9:5::<989:::67!:996$<9667!994366"=97%7$98%9::6:%9<:736:;8#86578"69::96:97:99:75<:988::<;:68$$5:9998796:9"679:96#559:$:9:79::68:;<6%9;;::S -S>%$$45:%7245$=46#$385$3:48144>77458:678#49:56244;57#%9##14>996244=562"#4$%61%%<%57%5%8#$4:451#488#6#$3#9%%$4<%6$4=%7#349$#3#%$=66%%%%7#$3:5534$:$"$2$#%4;476$64=46$%846# -S7>67%%5#=77###57144<:%744#4:57#4#>7%1%%#7%44>6980%64>66$45$#4;#8%"34=566634<764$4=56#44=67#39%7144>$79.%4>67$44><998%44>5662448:5734%47%44=46234>773#74>6755.95?952%$4<46$44>6$61$4<7%144=661334=%6724708A8664%=661#43>65#$4>57%34.%814#8<846544A"962244<57%34;$$6#24>676#4$=667144>#764#74:5$724#5#814;6954A76353>67$#34/$>#77543>581%34$6$%$6#$3457%%%#7824%=####5$34=6%44<47"24$;#66144>67::=?764%5<7954/8>6689%7%3<55%$23:767707%94#7#%3=%66$#44<848%34<9#97653758%4#8A67244>76#$3<$%7$3%=$$ -S9%55;776$5946":#34;8:=148:66%4;67435>77%44>9#4;#7244%9$56#4>67434<4633<$56548775:;864<875714;89@444A878144$$8774%4A:=65>46$134;47$34;57$2;47$%4:6575=%6#8%7#4<:$3465=1$34<#%7%764;%4;4#665%34=47:;$;%%4=B;#:6$:8946144;57#4%:#"7#44:572$44;58%4#47234;#7%34489;934:$673<:55%44>:6<245A75=344<9=7;237A::7;>;785<5617<699%0%67%45>%835"767444<56 -S<;/4#9A87445A5755588244A715#:A$644"6>6144;976#4$74>7674A:9642?772%4>7#:#55>7924=4$%#3=#7143>4634>%7562%4>6664#8>6%#4"#34>8664>45"$"4:74523>#75%1#4A57$#33>77344>77434>67$3766344A7764<6714.%$=6%%44>48#4%>57$34>6774$?775445=:%45:=954>662%3<48144>:64;4#%14>767#:#:57#44A=;74443A4879$:><375%2%$=%8"867%#%4886#614>66$%<=:$?6673>$85$4%7;6=$6#55#%$5%/%8$7#.;%#"6##.#".#%$$"$#7#4"7768A6$3$4>57##4A9;%44>$<%3%3>%8;4$#4<6"$4;995783>#614>$$56123#<:%44;$4>412%:6654>$534>4722$;55234%554>66#"4>#78$?58$4>;;"6#2$;556#>56344>56$%>6534">662#=572334>5%:.%5A7654A77:/56$;6$44#8A77234A5674A7$344>663%4A66$54A6654#:63>4773"84A8734A868216>8";"756:#:>7"$""6A7674/6668#>:$93%4A8%4A867:A869@#4>5%6#6344$876634%4$>82%756<7765"4>66344>6744>6764>:1$6%#>#7:/#4A56%8#>37633A<24>56#3#8;4;56:$52;>"7$24>46#4A%8$4>67%44#=73A67$44<$$$%A#771324;57#8#6<65;$76242>"9$83:%78#34>5$42:977:#9A66234:942:8787#:77"5A6$244<#6"$:5%:#3>65243#?#77#50::8#88755#8>5$"34>$78A4424>%64%8$5<44$#449#$;956"4$;47"35A#662">6764A55424A566345%?769%4>9#2224>77%;244A$7344>67579$54>669$%#>8774>5754<58"%9:"24>9244A6664440?6#=35%:A69444>$69=7>#77144>638124;%814"<337349$$6344>576:2$=8A9$44>:9-8>5524<56244>67%44>7344=<46144;57#$%>4#13;548$75#24>6"#4$76$2#54;9659/8A666244;$653>5"6#A56$0?6$9343A$355:%856%4#>5564>#5:64@<46$3957>#78;6"$9757%:#8<46%6775244;575#"2>$6$%4>$/:466$%2A775745?77#4;666%4A788;::86344>45654>7654<763%4>56"3394%"#%%3%$;5564>6:-6A6674A4634>5##133>"75$33.%%<#6#34>5714>5$4>$6144>5%144;55$3%<"%2;464$4>:6184>6"4"#<4757%:#6%5%4#2$%;#$"$3%;##71%2>455#34>$65%232<$$555#>5%%4"%$6#"#%6<$9""7A"#%4<51"14<##5%3564;5563%%4888614:8#9575654A%173$2>56#4A"%344A%65$53A$66#.:A57%2A56#8#4>41$"4"?46#:244>6%3$4;55#654>8%5$#.4965%##8>#2#32<%#%9<%#$5#4:4;%#6234>47$7%8>%%%44$9;513%46344>88913775744;$568/44<46564>4$44>%6$984%6%3A6673A5664<856%3>46134>6654>46232>67#9447461$2/:>6$"4@%5$4>$%$4>456#24%68#8>434;4$7:#?7;8"$32>552@:>56%;$4A57"44>4$#44%;<47#86#$3>462%#8A56234>4%12>$7%4>5%34>$$##";%66>4%#34<5$749<7$7424765714"::"$#4>76#20%;56%#8A87:#74:@664>4$%$4>$$9=>876%:A4443A8677/?:"$6"%34<68%33<6#32>6924<8$34>6"$4>6%%243?%5554A7744<562"4>:8$8;9"1>4$933#47:%%8@65342>7%:/41A%64A774>46%%#;7"$"4@7%64#>562:4%24>#6344"7=656"34>"$63>6"320:/=7674<557#2"=#$$23;6#1$>#%66#4A7643A8%64#:#2<#5#9%4>54$4>#$%"#54"<$##4>366?9A;56$.<%>6%4%:$6333>57$375624#<87674:8%8/84A5734@6%:-2560;524=%444A6617$5@8663A85:/%64@8%9@6767.:0/564@9$3%<9#="8#8848A5#4%#>7$#3#24<676344>5$;$4389#44#:>6$:%=.:A56643>464;7>$#543#98@65@477:663/=4""14@46624>8%9143>7754%<2?6661-74@365#4>5%#671#44>5%#8$54>4%%34>#7767-46%#8@55123>5$07#2<56$%4>#5644>4733>%>568.96>5754>:8734A481.44@66:95"9>36"134$546%>5$4#3;9$4$6;64$%33<#7"#:%44A7744>563124>47:#6"76#54;8#%34<4#34>4548$9;974>5%4$755%4854634469$564<:474$"8>5%5#2>67#$4"=46"%3>$7#238665%>84#23>%61%>7$3$>52"@%6#42;<52:95557%53>654>563>37""6<4%7:1%3>$%33>76#234>5664>6234>8"#34@7644@76223><5$0>$#13>5643=>45#4;"?#47$4>6#44#;65#;7669673339734<46$44>$4$:1<7>86534@87853A768;5$#234>91#$;>#%$66##4>#5$740=8763>6663;<46514"7>$62%=9%3394%3<;65#23#>465244%5#<76%34<%622:93=4>"777#.2<%67>%$$7"%=#7":38>6#6%=97?8:773>5"3#664A7653@#65@8641$9@98#73@67444@<434>571$3-9>7"44>5561$4>3953>8$<5$@$8444A85333>643%?<7563@$1%%;2$A3$3#5$;>3863>66247%768$%;>91-84;3<$78#6;7633%>5754.%3=57$34>:5@7664:65:$%#54@36335>9:833:7755-?7658->=9>76%444>66#32:%7"$0?<9"8#%;37033:424;6%%1#$8>651379"$$$5::%<=97644"<63:44/;6";5;76234:#4988#4#$%54<$6%%3<$4"134>3659"?43@75$64>943%3<465"%>55#"32?<9$<:469%4@66#;$34>5#80#@661%3;"06652>462%4:85634>5#34;6623>84-62>%7###$3>54033>6$22%%<656239<$6785660#>46$$3<##657$239746$23$#7$$%3:3613;:643:4>46449746:"#7"5465"09>8=33=55243@#8=9?64<"57$$4;85333@9@39%#9@;$;54>63"<#">8;4#533@995#4$=:=577>;75433@;#74.;>#886"6>8%66$#=2@:#7"32>4;8"54>8"#84;54>"%743<564/>44>9613#=988462$33>:=74"?97"86=0>350#"#$##%"$8;461339<$5864@%23>%7334>8:42=46%33>#8$3#588"=>777$8#8$38"5459<$"5563"@;6243>7813743;364>9$%$6##3>88$243A597854#>:534346986134$#5%"5$5#$#$"#7%6%%747$=$$5:$$3744<97$43"2395"%%76#;#99####$$$5#%$%9"8$%8#:""93##547$2;5$$"$$/5<#4:5"4%7#4"%#75"#%#""575/$6"7:$3;3%#:#4###$#4$###$93"4997"65:562%-;1-$####7$$4$"#23/$$#8#6645$##.%5%"7##484%%%6#7$#%4#$%3#5#%56"$6646$%94#8"%%$6$45484$8#%%$7666#:"%"$%%#$45%;%%561356%%#"9%#"#5"#44#54%%$##$#5%#$#"$6$"$9#4"66##%#4#84$%#%93#%552%3##47%#7$4%"69###9"56"#%654#$544$6#6#5#":#:%#"#"#$#47#7"%5#"$$$:#$25$%28"#$66#3%.#%;8"$5##65%%57% -%$799"#<99755::773278<:98:8<973:$%;673<9;986999##6%599943%97995<9;9%#9<9949=:286789=:68<7#%<9<999><:6=:75599656%;9977<6%59<796?:=:9$787945:<:9:<8<7$5#:;986?9=:964%$$99655$9#<6."8:61#$:9:5%:8;7#$56"%<94#04%7:%9%6==7$7:64$36::5575:;896:9=99798:767:956<$:$5::69986%:$;7699<94#:98:98#%965#8%$9=##5:##5%;:>;:8=S -SA77%5B667"5?56"4#47$4!74"6"4A65%4A4%244A%6#34>4$$624>4714A75534A66"3=%%5%8<47%34A"772%3A46344A$524="5144>5$4>%7##>582%4A56%A6%54A566#%"%134=%754%67235A66$43>6#334A667A5%$4%56#33>66$4>5674A6654A67544A561%A46%%4A5654A46134A7543A757534A676"4=%73"4$461%4A#76!4A5654A$$3%>6$$$=4##44<66344A475#4=6734#9A6244A#7233A523:46%$5>66%%4A47$>5$"3%$A5664A581$4>5$14>4623A571%4A6774A653%4A$6!4A56%44A68244>47"34A55244A -::;94#:96:8899:958:99887%9<94$4$99$$98;99%786#449:7$!59%997#9!8"8%9995$%9995%6999567997%7:79%897:96699;96678"956:9#9777:99598%;939<96"$6#6%6::#%%899%898#$%776#"898%!$ -S7@=#54=%6744A675#5A8738.%9>674488A57434=662%4A46134>55144=67$34;68##4;6$$<481$4=46%4>57334>8%"9A6$74A77444<8:55A67224>6%$#4=%614>67%4>$724A45;%34<68244=8244<$2>47334>%6#4;6714>471$4>66#2=47$#%>6734A67244>671%3%$6144;$724;66#4>6$44A57$44=66%9344A7%345D779=#""7$"%"34A77%#4A56344;4%#4>69:.:%0@67244A6833=665A79%4A67224A66245A69144<66244>583#4>47234=67%#4>67%24>48$24;471%<5$%4;48#%#6%%4A69435A66145;67%44>673%4< -S>676%55A79245>47444=%743#>A:;;%4A899/585D569144;49234<771<%$34>67%44=:87244>#%81$4A79%44A7724$4A691#4>48##%A795444;99$47A=:77$##%4>;#%43%9?97$=57A8;654C8;3#5A:6$$%4#$<6;545<79334>99%44A4;245;77#8$6$@666#5>699724#7=95:345<76652496693$%7A899%44#95#6#414>572.%$A76$ -%%#6%$6%"8#699675:%6#5$98:8##$:9:5544:%8:76675738:0877$::<:97:$:#4#8649;9%#2$#%;:87"#$"#":7;9.#7#648;$68$$"8:%76#;9$5%%57%64%$%8:5$:<9<:%4#5:7996:65##%7"%#76#;8#699#<9=<7$:<8$4$67%975:86"%"$%:#4$";9:8#6$68755$##7:9;4##;9:49548#6466;8878%379::=#97<9;95#":969679:5#"89:"553:9795$799"5$/"77#$98:#:0"57:$98##89"#$<0%"9=;8<<94:9?:<:%8:<7<7$88#=#>$=#4566;#%96$#;%$$457<$<9#8<7:983;876$%#7%"6=1$:6565<9:496"6:$<9/"4=8;9136"995$#79:/49;<8:%9648=:946<7#67999%7#;9;8% -8;998<8=:87=8"78965<986<:84:8:983999:95<99#;956779984$765877980#<7;55668:6983;9:898<9;:755<8;:58:99:7%=$9889#997<8$:9/26%:9259:=<76?=69;:9/1:99%;:95?9:9=<9:=:=<9:?9%8999=:98?:7<576%<9:7845?:<:$67#9$7;778;79<9$8<9%:96%679%;:97<:%9<:98?8<:98<78#9;:8$77%8#8997:<799977<9;##98%=>38::8#9889698456<8;65497:9865:#877979566<99:<9:?9<9937$7%97::9899:799983<9:99547<59;<:=<7>;9:8%7:<:9:999898<:9:76;999:$78699:98%"?9<:98=%77;:98>;# -S5#5?<87%5877354>5734485654$76344$77244>56445A8714867$44<%7 -567<8<969?9;:686:9#75#9<=<9=?9:9=?8;<9:798=9:9@:<%9899;872#9>"99>$=="<1267>"<#4%:9;.654$96"99:95$#:9"4###83#:99#63<969553:9;4688<9:996<9767#;999%8#6496=<1"<8"%%8$46%568%8;49:;#:8<:9:?9=<@@5%<7879";:9;?9=:9:?97<<9:?9<995%=:=<9:<9<:93=8<:9;9=97?9:<:93<86653:96983=8;454668:98749:99859<:96<9<99:?8<99:<9<:"77<99998=9;98869;#6=9<=S -7699:97588#9867:95579:6%$#68955$:::94374:97$45"8:6"78%$%986#$8"8:95$$%$""96988#:::7"84992:?$8;"596:58;68;47#""$64$.2<9957%#8$57$:8:977#9<995<87944#9;9>:"9<49479:;986:948%:#48859;:92#$9:973:8%97%687545#%;6546486975699%9565$9;598##9:6%87;996989%5#9<55%68:7%#7:6$657%8998699;:766:5""$7;"653867###499::6%#89#77768<992$<9<:832<9::97<9<:9:7#9;#9574<96%185:;:949:97778=?9::98:99#:6$79<;99><$ -$%$777##55<%#%5=?7#7144>89444=66$4/:67144>79444>577#%####7"4=%79244>99344;7714#%6144;96244:#66#%;6$#44>67245#7=9$4;675.<;<378$%%=?$$99244#8#676#33>97244=7761#8#$#724%=76614# -%9445?$7%45=78#44><=;6$74?79449%#$#3%7=6;5251=%714:9#65642=696#4A69135A77345A571$9#A?::8##75D89345A41=%46%79:4#1:::5%%4A7898;34D8:647>67 -%#5<9:9484::97:9677#7:99#<9#9666$:957"#89;7$":93<999%$"94$689;87#=9::46:;%9:<88;95%$<7:=$$$$79$%%%7"=29%.##99;967<866%68;#4%#:895;979459$:59#"=9$67S -#7B9955#:A8;7#4>6;6$5A975$45A97$%64A677 -""4>#<65$#<9:9#/#$:85668;$;$%$""$99:9806#":9;1"66:8<;"6%9"""87%#69984$8986#":99#"%%"6995$":9:97399<"8%55;:;977:589<495%:=:348?9;975#59#58%"3/676>=:<95<;"469488?=;=A<8:9<44588>$8=9764:886;455:59877=5:896$>4%4/:?#;9788589;1;9"777567=?;8869%868:=:9:=79=:97?9::97>%%#9;8<=?::>;68=;=S -S:::$75A88%%4A;>=654A9=B:64.9=;5967#644>66%$4A655A67$4A76"4%>46#43:%81%4977$44>= -S9555957235;58#89D:67458A56%344;67544<8867.=4786$3;$88"7A5815;4744=:76344;57544=696$9A77$44=$79;%85A57244;$5%344>6834A77%44>75%714>66#44>887"8>6634=#7234;623%<58$4>724>7%<%7#34$6%96724<46%#4=572;56344>67345=672$4::334A6914>76$44<8776$85A66324A6722%"6:4624>561#$344A8;73"7##4:$89A8664A9764$7=6#4>67647938824<79%44;69#4>57324> -S95#55?793.<2%4897844A87344>47"44>77$%;%737%4>567;644>:%9645:8?89:/44%4>89:$$74>;244>77:0<.<>7864;67%44A793<0$4A:#6%%%"4%67$417%47%$%>7;6344>697.35;%%7%%$4%6#2%4>4713%"34>%875>46$<.$64;9;$6144>$89%$<65344;67#$%4$<#=86676$4;%75544>69333;9$<2249494/>.:A9965A9;-<0$%4=81"34>69244#:690<.>A6;6244<79#34"96%7%44=69#3%4>77543;672$4:5$5%7=47531 -S76B9944335;;5.=$45>8$%#%4=46#4=47134;661%4;#7$#4;%6%34="7$%4>47"$;$#3=##7#%4=$8$%4>466$%4%8%:$<46#4>67%%;"$7#3%4%#8%34>79%44>771$4%#967#24;4$6$7% -%9%55A77345A=;9773%4>67%#9>6824#$4>67244>%76144A87:/$75A776:96#5A69444;6!.8$"A#73"$=67$$9$>6$8#4:;7?77%%<68:6>#4A68$44>85:%43!5;7$7%76%44>795675%44A57344A676A37;89?8:=49494:%?"8$44;76$44>98#;$44A:;:14$87767794$5$$55"44.6E77245A773%4<678""86"64"9866:$17%<##6547:1%764$"%6>6718%66:084A:#;1!$$%!:?79234A79;%64A67;;1%4%#6"$6##4<::%6=8>9=4#379$8:%8=9"#<;$554=65:$%#::5><6=9A#$!%4$9A63428<:89245A8#?876:8:8=67#2%A;48>3:$4>67$#34>67"4;#;#5;?4?$7%<9"%#5$$89671%3>8#"#3"#:!$"9"2%6$"=77#72%5<%<9;23A66<;54$4>"9<;<8;9565<76:<895%0%%7466=$:989#;5$;5#$244<67$75.#76245>4$=:/5<<:<6;%308"!%4>767909.76:7"9:586125#7?875%9%6<9$4#8:79"$#44=6!9<58#!"9#2<$7%$71!69;8=844"#$61418>:79:04%45"":461344A89567/%1#7895585?78:2=<:7?47244A857!33<#$144<;:6765<%"97"$7834!649;8656#$#3#97:636"%44A69648668>56#4:47452%4A96625%#%;"%683.<#46<<:$565674/877977?7A57344A:$857:8A464<5$35:97;3471####767344@97:085A<#45A8%83$$5>4A89:48:2<#9695!"%9#:<:82"=618<:9;777#<37>?:6%504%4A9#@345A76344A662#4:%6#3<%6227%=899565A%9%53%"%487344>66344A%$9-624>4;68#$%465/$$""47$2#$;"5"""!!!%$"478<%797"<##2"4:9%<#$;8;;;#$$"1;3;7$8474#8;844$5"8#!229"""#%%24:578:1>9?8#5;886562$":77"#"$24;8#76%5-?56:/#4<56%34%"9#68044>66%4#7<$"6$#4>57243#=81$$%95$4!:"4%89623#64"9!"7%!$;3;9#:8>877!6""!"!7!!"54"9#;88<%57""$"4#64::%8;%#"6""$$8<5;%86"# -S455=%7%<%7#4<67$$$#<67%=8334=7923487244>6%7%%4>5964#5>799>%79;:1:134#998>$62%5=:7=9%445>77%44>%8%44A#57734<86 -S8@98255?675#:;578$#3<98764A89647A996#4=67134<6"7144 -689<:97?:<88899<:966::9$98:998?:;686#$%$897:4#:94$$9<97$599;:65:8:5989=97664#::;997$:=996<:#:96<9:996:9<:76;9;77598;5#$55449<983<8;944$$88;:68>#9$658699:%86:98<9:<8<9467:#7%8%798B?94678=?969:?99$98B=966$;=9957%?:96%;=8$45<:<:9896$:=<9<84;=998:9%;<98<9%;99:?:969=?:=<939<=95?:::997??<9:<99?447?:=47@>:9?<9;879;98:@4::::<>@:<:9>B>:>8;S diff --git a/src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/r4x16/qvar.0 b/src/test/resources/htsjdk/samtools/cram/InteropTest/RANS/r4x16/qvar.0 deleted file mode 100644 index 483ba168e048d9c636d7933d4f7a38f5149af698..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 32987 zcmV(hK={7^gXaYyA_FWf7)Mh86;XjYfkJ^*0RadJ3NcfGB7-M_2ZCjSaDpR)41x)P z9)TNZZZsKN1_1{F0R;V@=K!Hw;7T&l005G}Qc@t-KR2~cec@P6&;@u_ywMFbzpf~< ze1!vnT=fm2g7cM>oXx}ray}V-sRd~xsTqp6DOHe3M1+P)C9@K+ddhi!WMC}MnX+iIZxFan5AcF z2ACFGAfy5_Acy(ythwcny9|Q*#pq2$4~NQE&flum>l8fMrvclqa6z`@P`3W-s2yXXR1IGjGgpHl-98b zsi9`mCyya1q%^=E9=EJb`jMBXf+Ffp+NRwy+kwO{qoTA(;?CCQGt+z_7G;qi5f~CW z0F(NYq#+tLz=g@CPmV@2JRPMLf|7HwkYFiLn-LII6K}l6(eHklZ4m2lPNPy$K(XU+ z)L^1eQxIKHU9eoRQqn7t3A3aVN{Ao#g%$o-wY!4=jpa}TQ{!_*B)R3G?k(#EKBwT5 zS4yB~5VJtcG-NqpLqS|(DR(2EvA=yNK>c+W;)l*(2_)U`0`{vEniiY?mEi|(5iPYn z7Kh^P0v^J>4=JYq!DH@o*qHY6<=`JwsqZAo>7UPNS`s_g;7@#LV;NS5+XDuNcsHHZ zu3#axTa?X1uYj^%>0(B#hMow+S^4a$WmzusJUJAt z(rz>_zf$kbTjD|14LSFAaGUI(Ksr}gpzbzN$WH7v=-f;YjtyQg$tYppDvq3>))iCm zUbbeOZkfExgL(1dG&FRT(jsFWmi%ddk=`mEbN*;$EN1vLz7L(D6_KB?(_vpGY9E2x ze~mD)$FDFte7O^x8sul)z;6Y=N@Z2|rzT=14_8?&*IXV2v&crp|v2|*DulE5EX~l8t?bNL#-4K_pgV~^1CstENoG4jIZlCtNmvR zKfk~yX^c@jTDY7Kgg;UGx8#CDq@VFJ<%UXjS5sHq<()DnPKB(x80^!_5a~~{kulw&MD#l-pII0=HU|DDxjeWDc$Io+r zO7QA&fmS?C8l4~ed#?}c1%I1Q!Edn}#AO)mxV$J@<9uxYvPWG?z~};ObS@ehDgVL! z?0tpoyw8n)iA#bH`v2$(GTDJ&zgqDYj7r=2&2^sYe*fL;X~V!6=^sRGQy-f>EG*q> zcaPf5EgxZm3xC+@3~D;(&`ENhuND;8kryhl~2sx_7D2+Vg0lr z5+?A$>cf7f(?mDY2v99IQql||=-p#%Me6tt>0frF*hQLNz78}u(LIxbFn5-R(Y+d+ zo1L+){!?)ViGk-VieJ(s-!136L41>$-c@K>S+5hll#xX)Hy+q}+=h~v_Hsr-x(A8n zhA(N4p9$UGLRi<0(A$RTR*M;o;-8pKR}WQch-p(&DAgdx~FA!kjhgq8gII5w1$d9bO>mTJ>bsVMNHI~z#w{bnx?>l*2Q5t+2 zi*5toWi-h;|;N2J`uIVV_s85NfSnU9V#;vZ!OqeknYEx|ZGoOW!52lY=rUTL#f2!(1MUP|j^6*-kK@8%nL~P#Lg#4;eiDy1q&) zNB^`Y639JX&xQHd??3ZzM$KH9Ixyz9jMij*JUIP%>+g!GD?YKFkI(p-hN-%Q82CtDst5>^h!x zM!wv~f4?B7-9MbsFcDZY$s%5E?(2oGv@0>sdyRo?5#LYwh6A#04=dDbTeLN+rJ1Kw zhQZB=82bxfdmPoFzQAQ-MHpKaaDuFPiLYT_Pk*M)Ha#H&cm+$p0DOC&QVIp%UPyj> zkklakCGpE*mM$3=u{DIafT z(7)Z)z1O+{Mupx`AS4t8oB^ph^MG$e-@ z?-C6fr{gB&>ZA-NK`K*Ftdwp>&BML#mdDe>&+`oBr2e%j4)TcJ&X_z@o`>isW zlkXXHtj|TI&Q_M%1(&oE#()*m9(RaFPC$~tELDd!d^ASx1+<&Ne&TndGLJ$O=c{V}hP+FHYsKu}X1;|6m{VwUX3nmh8DOx1O zF1A%RqexfN#2>@zQn_0?%VMF`-62a?Y$NPxHrQ5l1?}QqEtfdPle9Uw(AmVnlECIr zxk6&p-!RdktI+A+Q#aF62XE){`BSAlV_|pDRCNuL-4w+kC>2Pu|3c3YwI&}&GuK9^ zc~=LZ$VMWGcVl)Xm-<`=!ADLQZqeXS;fL!-q22`Ly1#4=xa0LmPt%D7_U>K%5U2KK zhE0*O>&2h7pX1v0dbD+LKa17xI0wT(n1uYAK%wOgXl=O1%$Gu`f`kf2am79#R=CL5 zxp*A}ep+DZ#erBSg9{Y9nI}7RidYbOH1#l{Sp-*o z)<{vwB#E{&cLw04_KN_ChzJwue%lH%y6qmhRzm`$)!czF?AUG=_7}9?4^1K zo{Ma%C79o&1osZ3PG%$f3spziX60QCHH>PLRaJdHAJ@iL&ztx)k284X{wq{>8>ptm$nf?&h0#gT?mu3>17R zspSneUW8eI&p$Ng*k2G*^c2@<0`uO+on=-iwa|YR&>yU8;#UOkM<&HXnXwv@F+fM~ zsZsZb1zX`0Bp-%)kK{6pSQymRN+-TC z&g1)j&;a*vuU>Ag_$huQNp$6Y(8gHzNk#6IDzOXj^wegFd&%PJH?XP#UZJVLULlNL zHNu}}T0=|O9SLz5D;B#%N~l>$^7eiDiB{{DKl1Svv`2k-8P#2T`$Cmc2Xspc`gHzU zA>G8i-3L&|M<)_r$?fyzEeOqeQcBfEqeti`T|(#Y3DzCnz?j57DCa73rUyA=B+T5B z(bP(q9K^mTK2n?~14hBErXJMj^&b5AVbF!^RXIaQXOjqe3-DT5n}cDj5xEa0X-Ig4 zss(sm0J;u!CG=~;5Wv;f#hEfAP-Ueu5FjQ<1oM%7+tu0vW-hDfh1P`RbGdP3deO!s z*%1UF|5>Qu;)a%bF?~XFj%GIkzX33pr|0w(5J1z`u(OH~0|1Y(?s`pXO;p*^i;zV1 zF{d#_zIJsL!)An1qGev^vj`gVn1eFKgzCN%OvUafFEPTgXeC6^p}4BLMiAg>@gQ5s z4aK?EAkY}`3L^910AR{h&5kS3@c=nRT@8wN6)t<_h!naVkEF}J2&YREzOAAYZzXHt z{jtyC#j}Hz3`bJvCTDG-&k?ex%@&ZQ6DB7}ih|IIY}9_6potNHA1*e;b0^^g3i~|; z!N?Bgr<0`MRQ$#C~{)WL8Ph3tdgoA z;1@H)JLPd56PR;Nb2q<{*rWQGn#Kafb{GHUYU1REL56Da<`^||{_Rtj#fo1JTAy_Yn$b@J7)@o`nUi*R8Vfyf}Y%Uzdn)JAnR!8CGlONMo034X|i^}_O) zyhUSDdB@@P*u=mUC@f{SZRqjVPW6)}W^MLya9Fx@fkGmbCwg7!VOE~Q zfaJ_sh@cOX|LAlEjpW^=HN_XRr^6UkcWW`T`CT=qaso&|Fq%AvC zic{41AOnAQVCPLK3V;o+oMm(E?%`xZ(Dw<3vbJ4k8nG>uYSTnjh_)iQ3uj1{bSIBy zfn`)t9d|+%l$Sq&+c#Q*e8z%EmXt3WI0@xV!t5wZ@W7bzIl&v7rnq|JvWjmiXtum- zK!v!HoP{4H3OF!XX!3K@t0vhqa759@@d$|2;iIV5f(JsOt`ZPd8^9g2any3Bxw3im z0moK0r-ClAW=(7~u*JvcbD3R(aOo9R-;;ZQ_ke_20fTA4RRuWVD-j+#FhJ>Xpi)EF zlLxp!GSHw~@^wW8jYMyAGOXnV_uXm;4yT(Ma5Fruygj|WT|8J|3e}A>E<0d_a-1v0Fs;M$|#(bMsfDnMIuARAH^tu_EUT7hkb9 z&UUi3Nsu|9Rz+SHPgk)-AO&fpx+f|D+CCQnNI(f^&whp|lrFU750d-E8Zp>I zWnr;~5B7gtextHu2uhJIiTOnoPKsuk0436A(t$6>( z$Ks)VPNsFuiM(m{%qiVvL_o*EjCqpVdyeDKU>fHAfyOL3Y}q-nc^0(KUzo}_L1_3a zlxvhP2Wh^Fd#wdivppNHHrfWof|Ngfak#V2kzY#e+FiyfNM&h@>gm~7#w2SBAs$m(F9%q(%e3ux}s za36JiKFBnkN;9NTB|rxq(Y)Ipe%TSpRtW&WpuL_>3gKp*>B1=r82lt3xfh z=Rf*9#dl0hKTWN_!mYDvioB4CNS{s^%TP15anGNFx&T0{^SQ@pC9>$i&rIDO!5l$s z8BNEX6X0jAorto+ihN1xi!un(>0 z(rXO4GAWzB(L1&N?BS@nJs4W8hHY=jL@B6H>2xgTQ(K15s+ltl4nek`J*;QI^XV-r5${WWLAy8-L7UkR-%VG-XN%7o!`^jG#0$B` zj8%#r*J`k6vuRy|!O0|<{s z{(SP<9jk?n--&?QT?n2WC>&y2ej1>7U=XOH-K7K>pqPax71_si?T5SIK~NC7HH->T z-ir*73Q)h+ClAXVlIoMsH7Brzz<<&#A1WYf^V+1Y(krCtiLSjSwO6NKTw%iONdI`t z!)!sX{+eCiC|RZ7l{(_sSib$s&e82m#Wue9=Ls`Yilkaf@atP3C0ybC98E++PH90y z>rBmQiU=NjB|@B+K%dV2Q*gRoP`Oe1V{5NGKI8w?Eb`x^?=E&Qg(M$AC75iJy zz03(3?kij@Z!hgAS6daUy9qw<(T{Gv9V^gddFj z5%A8hJ8iP-S2r*PKACOX$2nU?pWTqR)f_k47^pQ9Ylw1u%U@wghx8&I=9sHVGz<6* zXg(wkWZjYX1Y#0;VH-M z!_HYZ^lG|Uflf6Xh$Hf2;^Q_m!4A%ucUcZyQ*Tu_P(SW!#F+jaky5!OtY{u754cXc ztmW)#L=Z$h!(4iNs7TaJd=f{*kvisz0z5w`J9x6)lDnr&WI0KIX-?VLmPXHub-3c$ z&UsM9hf6nvOW&6bUMRYZ2P(mOX<+M#@7)*JlMa|aSh*7SOS64DWSH~1kh2FT=BX!x zwTu1@<=M-rCn6U{Dc<|R9g^H`kmgZ@c=tj*vKW))gYEMG=~uIgLD2>LPU`YRqde}5KRP=0xOj+_6Mz^$z`;p>B{QLt}LZB<)dCYqId z%@RZWE_j2?zpPAZaGFa$c${tOHu!e`HfzuQrZ}4zT2!HfjZWJKlc=O!Pqi)UpDM6n zlL^OPcC>wq7ir#ePCr@2ZKc#aH}xqN9p>4d65O+Q7wAd@lE`+V3mfE9{|H z)ST67D&4S1{n_)(FM9WUZ#T4(an6_)6`F=4>kEn;pe~pS(mgcKncIfW-LQv(%e@!^ z8jrZgWhb%yvar4BpxpJe(f(F^O%uj#Gq)8NvOv@$usi|Qnl5Ye>G(wZ!oaj3z4ORs zQ9VKNewmtCE@VTRC!|z!2!JB(7Th#hndUl>BjXqiJBnN%K6^FP%>LQSRFzvt$~Jje zPt$#&BLoUv3O}D51!1D2uGr_l99mp>Fo}YNrrpK@$T#YpF|eGL%(L-J0mN;(-%jHC zSR`{Vgz`r&@Z)uc5AriyqkEG2Sw|5^Ouq$8^-5DM(P_E*MPGq@fJ`N}H9VL3h$$a1 zqVuqC9olnT=slmNe_Rp>ckaekuh!Gf;4i99e@DovSiE<6)LLJ(o9sK-h7NAOl1|@g zo{mVmyCW@w1{hDg1Wo03^_PvQ8&Qq;_aY-sH42YrXgjaoMNKXJMQ^uisSL$su_1kF zlIE<=Sz;18e%W8J()O5ICJHgAlSfQGCThyQ+~?ey302O)pJ~z-H!NyxX||lOD=%fTw5ky1cgCU@ar& z>Ga8AdR_5emvR9Z;OS8hv;Rlnv&>BC_TUSArS zSd%EVp+F#68ER8wZ5fFiS_2I@qrSTqO!IA~kW8|CHa!{5vM9K!$sX*SAo^47=38WR zq>DW@o$mGHzNU;hu+=)tL)|w4fbMP&^ueIY6e@U{;S+{T4GQ)4y%m~Mksq9hb$s0y zU1Jhkf9j%q-tP?^V?FTz#GvQ5N}etJ0Jk@XHX62jfeM3O^%fE3u5-E!{r*B^ z4(WG{!hSRWvncM|F)yimdp~b`@bNrZnb#jHwXM(-xU8$GM>JC>bYtwpQZzTdJUSz{ zR!~E$(!|IAnuR&`xQODGrkRpo6Pve{zY=R=p9 z9k6=rx--DqC}RAqME}FAuv!f8HN1@KJ}Rx651`3HqgGG=eo84&=N0$mG5l6pH#;#Z zkaFvH1psMkDW?i=gf^PkwRR0KGbS^`Y~Pi++pAWY16?yG%x;3Za?VFc&}kwg^fu&b z*qJcF!o(1uvf?DiP$vJNq;582DHtH@|D|x@15&g8+661ICWS5 zPicp&)=6M>Pxm3+$(Ic6(8}mGzaulO^Ia5AGk~ucPG$DSA%co}RhKkUP|8ra{~&r8 zC*)e`W(wo;{AiK-(5b63n@+DDb9nPu2GDP#*r0p&NpxYGd-L1Zsr^Tz%;J-(Y?CJ^ z+?FGHLFDhJA+0Sh$lk&Fwn%<{UVnSn3LIDM|>CVSLsEM6#jx$fSFjLqtgr0_%bA`GN`%=>{HUnwDTF4H~`K5!I% zG+msEM!}Rpx)}&y)m&^Z_N=J+*vRNhhBnpAlw9ba%UB~u4LzK4N;``A<*4DBeRz7< zg;`+v$$3}y0I2ANjC)_n1_r9y+3K{XkP)2sUFtv{Ya2johjxi+w)f-0gmX0OApKc< zvHuPb$2jBIRt#EdWxFbS(cVrcrW?SH5x`Lf@%o`LJT13B_~jnAbsI@Z?h;Qoer;A5 zT(@H$(fj?JsyJ1tn4wD?fe*aH%C2jk&tsh1?UvQ`nF^#4QMC>|EQ&XMv;UHQo zV63YFZN0R*C1^UndOnOy$UKh}r9^x-LP8w#Dub0wzlv2hgOcUDUZlBBpyZ+ma zm6=#Bk32vL-=QA%7OF8zlL-!xnm1#;$Z5?^?{<+UI{Tn2J9Tg|Fw?fut-DYBg%bh5 z3~Wf+2|*5LY2T0f#z&dX1-atu32Iru=-nC-O}48;N$=peIC`!6go|20BRYQ_i_F>C zV(2$HdD<6^4~<_5+O>{})wr$Pj2`Jjg`>4XwKdbbUDI8ixCgt zHk)qS>FfAStt&@e@=ZyXG``cxhRI+cru8B?kl5;CulZV61ncST`eUUOrBQCz}KuX@J ziJqb!(P>rr)4@|@SwHLVT za*WcZH!54Rw=>EFZx|5f2_Q7$`J*ZGpPNh6f2|IfsvWjv9CTrC?x$2pEZd9!c(t<( zR=P_(qN_$&z-TM8YPnrxeM9wY9m`(xY-c03Kbl{FayT`JGqlt`1Ktgr`^i28zTf>;}|Vi1M>H?&JM%P zJ1TwIa~Ag4h>t=n0E@V$%MfZicx%Q>ck}hSUKKYzF?T?gPMpfExvZ}!0%dnzCVSQ? z7t5b5o_jWVXz=LI*E!Y_p0~A=-DnuH!+!?ZZN>)jW(1oq*j5|pJGAQmW=6^R`Rug3 z8LOtgAgrNJGN64-zZbKnCtM6|Ti&{!rn1;5^ez4QI{fW@I9CFPg!BO(cFJk zKnNIowpeGp&J29HUtPHh=$8bDo*c+26XPY$-@}7wF4vsunLPwP2+YdTXFC+IYoCnz zB=3Td^r=;_tJ=Ao{>W9lXG+4kB&goad(;pMDS2!b`A!LN>3Qy4$|wzKm90Bikt1xM z-uJ$v&*c&&Dy{nOC}6cd+p62uEP$!?E=<3#HT)W3sHb>a1VC?xPj<+P61h!^++&_e z8lMB;N=f&u0cpT}(R_=nXcdB*F03z_D9A`#=y+mabH-Uu1)|pm8cKjS?g{Gtc5A1@ z6ip+6aKfPtBxRcC3R@l4BgnvV2#q)(wx54?ZrWTI11{m z;Z3A_Nw$%-fE>Eh<2H3|vMLcTw>Y@9z3#=>Mjft|i8}*~jZYA&NC3*Trv1$WsaQCj z>`B8WCV47lu}LFJu!w6I7s}CZzD-%hVf|}NGRUe`<$eQ=TFv>liIZQi=y^Y;gI3$! z4`b&;usM=5$xvS&$Oi1^;FRn-GbpTdH6i~4>$Sm@qxk`$MI?{t3|D}9)>>`om6!-Os ztQSZd+&hBb?>E+?u~F|xGjpq;spX1hy@wDh3CgGTviSc*PV9OG3sd3v(q{*_z#SNB zv6vGWYzRfl2O$CE&aF3nN;87MZp3!% zowN|`MlUKd-(*TfVDjGxZgkliZ5q`*cz>O!t9Pi`4ITjXC#sEDiwP`T^5tLc0YmNF zZX+;8#4_!+EM5ra5$PpOmD|caC|_`fX52?BWx6D)4XmzXS;nu3;m2 z?BCXjmK}uJWeW~Z5%!DQ@rQB4nhG`TrodsMN0or)kLAAi*;$4O1SiE3VDVehwlDeh6Kk)}OYG2%4BP#hX4FhTQ5oPdY;TNe=+J z(qa^dSr3{E$rDQtTLwAJbs_tb?5v~;=h#9~#WK*Nt-a1YZR!d>A~%T&<$f!MI}8H= zjFzsKZ5~QYgP(mUOd$OF;E3+651ujLHNfa5Ek3IBQSiOQ8JnW) zEPq5!o}(LJx@{;rCQinAIG+hbV{6u8Z;-D+!=TPl6A%E0Bu01p-k7+OzpW)rp#OnV z-(ox4Xo}T_*0My1@S+8lM6Q-f?_!r=s^+kkA30jOxKhMSL!KPN?&9jkrA`p-0$PDO z(4R`CO~Xewcg^k$9qK=sLGfW^on_|%x>+cYs^>A9benWipXSWt;4#j_GQ^ z!DbbY#wVe#v%@bE?)W{R3ip%1`$U-&1+W6J1mGCdkk@2EuWw*lKz-p4!TH8t0MpBaoFNi`rcayAF41zhP=nb= zU^Z`4%Lp-|+*i6Lo#HLOsDiUEuldEYV2o{7Oy<1L-r@Q}q9d%JZG}I(9|$n>#eb=1 z`q{Wg4j@T@?wu)MD-T2EirLO!tX=%?x{)C17=_B9CkeokSZW!P7#J0P1l)~KB zpv~eq=NBq@j8x+Q3D_0?Q!u~bNA}WU=S&7ih(R>>tpujTzhos3zbN*@Sof=7pRDec zy{1Wlb=gM)cRn|GT4NuVaC8Oy(BS7?T{$$No<)Lz7Pb`B>{MRKRWNs=&eI0JN{Rox zSRIkUFDFHG;dZj!DVXY1p40(fv&r)#ZZ$xE;~qKZ#)#r$PQe}88MM!DUkL+KLJ1W{ zS=5N;%`?MX`W~F{#rA}%OrtWN0pmGTq?J0fBe+{J-uPjr&$6tpGKy)$xF7R|X?hkG zADCQ+Q|Jyiqr?X##5g>Ug4lPJoAxaDMWtC-41*r+2r3@-RD{S-4YBD?=((rR3VPEt zD|Ho=<%F^nyJ!rLWav_91yufYMufP8<^y>JZlrW}_yjI*Y%oM|UL{lU6S=LTD6v zo43p;E3y2|8sAL@xrD~!sH7Pnj8F6|YY5nnlt`R!+Ul~3`@wZHU4?EX6ThJ=3gb?I zFR?75{Qdw`+Mc*z^)qzhv@B_JL?wDxDMP1>YBYA07KHxTbon{MT z5LIaO{%=HhjnQZb`_(Xi5XKvy17C)I0Kpbm{S^1WM-dK4TC!Hw%p{+;S=Dotn%PgI zMv_@nyO-9~Y4PR)kR$rq)+4&ZB4j#0{KwW3*Hc?{u4h>0Aph6M(A^&sd;zUm* zFOq)2FaH0T%himfQWmvv+E;N@Rb+sS@`|Ui5VESIw3K5+aN;(0m&12%*EX-Q0JLKx zZxu+2$f0)w)(#OQG|5F`T`r~x-Xjlsw-yfK8bZ6&WJ87_Cu3By`S@F6w6e104COPg z>h~?8%yC{xS#3r>P z8FU#hnfA7`_DrACPXpji=ll0jQXzkdR!CXpUwa3nCE4B72x-;c)D*^Ucko0MD6vx) zAN$hDZZ5cQtK6AR1~u$xoDYQ#xb@71+m6{Bu72Cyjyc#be{ioFwa}}@8 z+`zu~k-yY_Wa?fW|FigiEVL}=^H+$|W;2N5*|JQ8#HO)}z^%=7R0Th{2PMErJm+LY z{A*DY*FM4s!?P-3i+HJ-cBRy?8z^ zgRbUz{1QtCjj?z%!oKK5S*Sb|RMRxSzlWdk8f?5cD08acD+0|!BdZl0BV}T?YqWDQ z@ts|v#eNW$e-&3gI1_PGCH|ck-+o>qnew4dh4bQ6(y`^SRZFIR zW@V`c9Q9f7M;KqxF@Xou%lz}O-9^3OYX>V5{w_-394M><6!k|-kE3=(U3rCH@zvdn zIAfNP=A~H%uD(geYjWbG$n~ZmsZUteL$B;dpz-#!ke;Dou+TQZ#sg~va=}(UdO1V) zCc}%w9^aUNw3H=p?gw!*l_}+xqak3Jl>ECAPT@!>?6H3_BeEy8{qOhdqHj&-KOdCk zhSiS=G}Pr zAe8El7&>)8oju4x^TyHD-WB_vR(KqUc_(+Z(SRTk;@?Bm_Ll|3VbwD<^3JrhODAKK zU!!T0K=c-A%->Dq&_?kTl-?rg)dFb*)l%a0qn~mpVVLPQeguK{Y@`D9Wh+q>Whp&( zWQd*-VSgI1YJTo3$;%V9+~|vh-+wYT`vl$|+w*3|9<>v1TACM^xkfcKfV55Y+3w=e zt!3eZceA)t)dmb?ZT1SY{aX~4pg5hk30DjjqouM1Bez|V4sPi$h8QYX%mok&i}CyL zv~D}JB2t}jcJk(hW!|Ht1_w})8cjS!D@85SS{2GvB`85M)KKTyOk_sErlN|Xfhhh0 zd2p0hHB;oI&qENxsg&okN=UsPSZ8Gsxu2}K3#&f@F#4AO;>(BB^`+3*;UI@zMFh(* zz;`Bu%kg*<5s%R4Q0CAjR$}!aXEEadbVTMMP|aKi$q+?dx+b26&cDI012WoD0ePm z1wK_c`=dkTBQS*hvmUI5?m~8>JQD20bO{_J0Y}T^?y#6pX^Bsx-a#QkrOx?#v{r0q zzo@g*k`%zz`{<$ca|RBP|FOrddV=6Wn5HrVdyV>`Bz0MX#sh7W77Fx2<7g%1fX#fx z^C84UK^orCnbONGBFXPQFXLhM`lZK;o`ysq8lKLN=9F6iC@wfC!x%cJ? zZb(j{#*^GkJ$qYp3S`0N{Kpa;{m5TxQGS~Uj%*v8@&$Z8M=%bJy($^LewPYt!iS-G zHV8{jmD~<&di}T!`wJ&=kXz$xCaJL83`@$M6YPxZC=eMtWgu*8k+_FD8H{!~QvH(~ z`8&3rC)^~f#A$te_|ajT>l+!odinY5qim0DZQ4Enh9SrIq3o9z@C@&~{MkT2wTYd( zH^QoGgFYkll7fE_)$nsv%C=Zzer_V zBld|>$TGiaC=f58RYLF47{d1qq*f1nF0qTZlHn1+!ir_9V4YxE&_ZG`tGsby1P57g zt71XCx1hxG2@Ev3y`tWc&sA@M&%D;aemm?Cm?p-O$@15lah;zavLI?gRT*x}ARM>` ziMynW1brRmRF4zv#>pDQf*Vl~~QvfL(IQ?J8L9evN2A6<^WfQ%cg ztN7N7)_wbZ$@&RRk@0RxQ|_XzIV(t7#OxV+fa?(+=DV{D2o4#RPCNYU*r2`=YG!5s zY$`W?O`g%rXrGd8U=PZyH*U=9@Xc%-GYpx|Y3sj^p`DW>uc|HZD7u@@q(&thtV#tO zEF1Ma)@WgNP^&ricqmZv-I+<2{fTZED7K1rm-ZApf$a3Q9ZKGO>-*w=Svv%Mj(Ydz ztm4)d3mnWQZahY7E$^>YWNz#j77Iem?cHxJz{_5L)_=Xx1y1Do?f7RB6(-nfP%NQ7 zk<8pWFDlCI&5e*NBAjG-Pp+Mt5SnXS1k#9IrobEui>tvrU+0tCj^2rz%%MeZb8pn? zWDp^&Ti_Xjlos-uBfTrEC80eFz&r@<4W}h#vbr%Nw8Uv>2_Aoai6a4 zzKrGp%ZI_aoE-xVK%G_#CYYrfaLngP`XTMt+qz2FFpU(DL9%jsr91A=$9zsFd%(f2 z$}JcW6-~0BmMju}V=7|)SBot)U(oZ)^$JL5tt}Q;t?>PqQ}9;ni-((WeBL|Q%00#E z-10-UsKUHGKQgzG_@T|J=7{E95T*!uy56>1h~Q@%CuAncg!kyJ})QdrdlwUVAVBW_v_T3_pINK|YYo6Qx8 z6%Dn)*h5RajMkbq99%PNj<(5AFTo3|wQ%(6ve|j#t?8n@WXQ?89+-wO$hQM6f4Nx0 zqbY@}n+qq6p}H@pSq{ulffV4e6iU4TC52?IroOq#N^5|RyADJHH_qv6c&Sjg03R&cq!W5T|dvY=3Wt)H=$)I z*4_Z2iR;Fw17Gm{X8 z0u`Xp;7-)47_W$XY}Sh#-LA`z%IX1K*x1+O<}6@D8(kqxGw-Z5)*$3Wb;i2>1ESmB zI1fLV5Y6#UJv9aI9$Le1v>)a><*ilg;%y`k_ZYtW=FVu-9ubgQq*bjXk8gIj7 z&kHXesXaV+P@L*stX3|D73?C!)xN>Yu6iyhnA$NFbF80z`LP)7St|Ju(|kmnzRb&g zfp>};{D92~lKCD5nW%3Wh`cQx+6l`R!pPGYa6BP zX-qwvb=g7ECXI;PO>4*~2&!9qlU?^L$%;_85PK`kiE2O|0PT3;o$y(dWf(R$`hj|V z#~p==#{sX}8;)s?r%Ht=tAhm3U?x2yAc}4+lh1u{H#NMeLNhzD?Wa_ z|68!KY`n*{PF3Oc0IBuR<$D-^Ka~{Xaj8b!WyJEU4a>Or{{E%63 zJt{o7Zk8SV+JJBfj%Gg^r|ah2w3-@g}sXjN2<2qkQt392^JILt)}rm&JR*%zE>%zeJZ?bBjln z-nCK6B)RX5ObZ8SX71`|+8{Rkn@a@>NS)@XGBq!O%%-{$XkXhqD+a+@3W39Iq|sD2qY5@Sbtco|F+9W1HO1r;N}|zq&*oy4xk&Tj$yUjv4~Xn>avfaXX8Ap8WejF7j1vpgWvvj?09A zvWqJ1_NtICM*vn589vqw>HU{UrA}#%cKgMCTii~0rlVtmg3|ZU;p~!Zti8M4-aBCG z3oKa9N$lDBI*ZyFLE($;)B08<`kT_GyXxmhN$)%~42wEGerB`x}=G5xzdhK{{s zIEz)EiHKeJq5#X9ULMJoS%v^RK*YbHDfWP*wNn1POEq5ls#-`Sy6Ni7D;nK)zlI#n z#KjOk+|(-l%h4f0D-eNatCqa%9VJKWDCCGe&P;x`DiP?^m;3uaF~++Fjh3y@+%<tdGRm<>?7dXx*)yx?v85nr>YuNw6tVu@0_KJV$+y?S`A% zXeR4-_td%(^xbt+yHHK~si9Hd5p$cThRQe4^15*yJ&tTE*FX;WaC%aX)-7)IR_ep_ zcSUyJ9LCynh~oAO-ZH@on^^kWDCdgL8HK5=YK9OA5qMx2@}6E=-P;JLY!H}D!hV^n z1~wNm5(G{)Y3)ev09x^9mQ@m}SahjBi^@UJCMSIQCAOIjhJ@k18Z>s|2i+01`UD}# z8i*NZI!vaq81KUe?3;V$9i&_;!};4F*UCkpI`Vx9$y}PduzeerE*IQ- zXN?+36JuzzT+_@m%UyigJooJ8U9D>$-98OQuK&N=0BEsUddTr@K=m|MRa>kbVeG|DD>0Y4sudISQwZoP!orU-#SMLKtJA5pdM;7e+e`R(s;Jvm;^vs zGT5-WAb5ZthZ^rYD7|gq`z!(6wpQl~?-tqPx;2G2hLTWlLK|zUdkSx7NF6?p(DRc* z)kV>=1S8RGTysj<)hHdw>Jtz2h7fEj)AKi)ur_mz^DYspP$qH_5#M8Bga_|7r|Dv) z9{N}RqKjVmSPaZSerC=HNg7p7T|lBKbJsJ_q9NFgWa{mlhTx7MsG2fGQovN`mMMY> zBx+UV&L1E=8E{&W*fOP^9(IR~SiWJCBi9c5cO-Y)VseShLv=JY_O4KT)e4W|8b!4A zH9%hd&Q}24xfh1lW#22Aw)y{sW_-goSWZ85dD-rek5`<={m)uWHg|?J#?lGXZK6IFW0D-OM6RsZRYY{pF}1 zs2W~_kE9NJ-v4>qvHV1yDyKztl7J6!-otE(@1jl(j-=fJx~_nJ*o;4a504nMDNl_S zIIlba-EnXFFAJXb$pcM)YluJg?!*=DHKrpG?St-CT_6wbk3xM0MUI#}A3Lae-v^#i zdh(4epCr5_m~RRj%(hpjw;k)0NnExeS#M5j>?$6t)nQLzxUPcBxBONTG!T#D%!AEC z7o3cwjYq7x-A*YwDq}Wzk`tA(V7g%4Ky9$gjo1Bm7GvN3_nwgC52uShyFE2Gk4mmC zMz-2@y(4XS&rVzjp*0I8OEX?sfb1WCYGs~Va!yMr>b6 zPf4n`$_j__aNtk@z9Y}yN?+qDiXu#%U51?mOe~Bo50zgQef~Z{j(uj0tdXe)JQ5u- zI)vu(e)wB*h=g*5Ne4ccs*?O^f-q2Q%0vMk@FW#0hD8ZYxduRO`A$X|&@?(M2N`Se z)qGJMo{yLbScKUYDHO|)Yd!WW+jsXQEEE=K%lS=8Yctm}+2s`iYM=`|d+!EUnQX1K zd}9`g$y4GOjeqQo3_koy-UmH{JAs&!PKzttJ4pG^s%S}qM(nwA6ZJYxh-HptnXq`3 zEuB$w8YgRk@foS>3sZH}s5iX8&BTi#>gzjCtTa2c8Sv~JK1FPz2@sveszLl`w`?W2 z`+sr^jjHJ83UPKB2-AiOgA38I!Gc>tas2@FvOjQc~qagY<923+o6KR20=(J zTw}KvL3#X>*%>Wcw~Z-Rc5@)Xnquv)Y!NxN`a8g0I(oV?YglkPAfpTx+Z7qnbzg(6 zo4XuJJZmahEpI#=XbNRm=^;Cgtoyu1l!bGrt0|kB8<&#Xi~XsK;BuF+t=Z_Rx6z7z zdVYepeEKN5s0+==?d9{`Ffu)Eyro~8pi5ZZ+&e9 zD;Vqj=WUwJN%!h;>o^-?YrxCXjT@df(_1hz@boN>J5)vCGml|Y<`ewm*a6DMdrt5B z%uG9Y>1PLVizrTx>-ubheg=G4eV*IVY88jvJ>m$vz4ykaTyL|{!Z(|4H%vL?7 zd3;>6J8@=$kj+lu3%t+#gZ7KcytH2BFc@y($|@|vT0rSrsvQg19Mc#l?2!h;+Eq(R z9LuiOOJ7xI9<2v(G~Z z8-pA}E7mc|S}e&Ig0Mg&Jv;0*`Krqyv4K;a!N#y1}bt<<75`mo|q5t*mhD+bO?y-6VUG`*%n`dS3ibx z(iX)K15x${lnaluG-TAtZ$$>*wAPbJ^jjIfn?>e)VXOl8@WP^|fpQpiz$yQa zM_Ezmk?0~WyGMf?B{}uX?-X|i2a6h&{gePD;N}Q))aj12)&oLQqV>|@?(yQCaj`y% z^}mEVjo_R;>Gb}#k~QxDBEB^kLA@STaUQ@qa=gXCv@E*Tuz;Nu5)~*OBEYouiC4(J zPg#j}1>ev4N0nhcg<%5rcH^UFC8^*D^QP&^xa~m@r&dHUwU)m^g=c#TBnGlteJ65l zV!(l)wqtgz5QQaJCe3|GnBY=-6oqy7{tK8=pzO|qf`l0+bIt%~WCg^eFU<*Hz@*d8 zeR#Ac&pEn0pNrz@4U48?;{-`P{VqH^<q-u5UOI z>%dSEg~74gBExhaf4n1RhrqPmZ(|P{1?xW#!Y(iLDv5U=(T9L&G$oGoNZ_Tuwv?3O zaB8TH5t39jm+OFN`F<$b5Tu|*2s#mA1=N4DY|rw<^<|`jBqv4c-`5D-akBWk9}8v( zho9Gy$|K+oIx)HWBRSSPm?oJmQ@s^nlP<}8Rjn{G<2Z&x3}QwIGChUGT>Ek#qg0upcp@m2OFW|Uam#m#^{mCFZ@!M17vOM&ecm}s^(V|}&RL_cU@a;>jECyCW-A?6hP zaCzMuxF^w$b!?NPzf)cE$W!F~DLQF(Gx~wnXL5m+h#HfFG`|E%V|9_?@B~5uUjMn< z6P|yH3qOHj`?wH!!34DklBx%=;QYPW7aW5tB+?U^bh_FI`#-jp1>$Z9f!e8sqEfGz znWpGEqu^veCA58sQgG(;B4C04@LFqX6 zn=*Ya8^SA9UMl^bp;zP^;YoI>m>f+8PtN&9tC=#@T6gaeyvW|~F%|5J9b-PDdmVm9 zU(}2~p!-FlE^lk~+&cFW{u`K09tGgeb&X&lD%>7&FD-z!L6w=(6E-YASVq2 zG~tTA?qz+1o6rhH5BN*nTU;F{r~{1IDD(W&iQ0bqMpquH`*GVm!L0z#wKXU2DY;ik zspBz?H)|0$pd99fcKr(NZLVGo?n0~P97EXw{NpDotq$uN3l2<-PLrUtyahtl@jm^uU>DFb#yDmKX95thDV%93G+N~-&*_-zG&(<1oaXk> z@b`m=rEwPo74FC-Vey2AGPykfBTwZH&lk#LAbX1D63m!iGSDutkj+szv}i`}7}Woj zCzoUtQ8e)!UCg} zLwb!ob-UK8zF^?&7L7!eM$z%G%_zA8uK$P98_%yZ8cFO()b#G5*<1P2f7FV{R8QqB z=!EFbA6z)5ay3=YHoyb2E#JCMsWkIp(VY~5HPJ5(WGF&|6X}1Jg_FNgH~3=pw+!PQ zf+7azq?X+khJGmM&j&Xb2>tQ!M3v7Jmwah5^sufM;Vh$)gB zFmk@~jJ6Tv+(mH+1hV*?yAL8w zzighaiEvNS^R&ckW5I4O*aKP=mcAd>a!ysc@=7?x#FJ!U6!zY# zRpR!4HJDR_Ts`MGBSOn1YH$paDRW_I!>VCtMhrm4=}qNbra6}Ggwg(<<(z<Rkg=RD`R?}sL@#$g?gv5vCE788aAG{yI9(KK-`TNx?z8mE;&Md+oOI`{$d_WZOuvO zTKucg1<9pjopDJQ{Yac!OmYGaT%#(JoXsF<;E2R+JK9l^iAE{`O*rrN zw9%J1jYm(~Q$oAkm%!=*a$6~@7su(b`LcXDo08lCF}~{=b^5H%TBy!qXHwloqLt;? zpz94jx@~>B5civuZC|=v`A=TC!b(tPo`B&Y+jJ5&G4Fkh?vW5qkZx#*%liNqVIcm> zyNOzUeSp6!i)%L9d`5;Zt3Ms(xufW0LeT*?YpP8_;;L15Un$Lm&+6MCrSkY-9R%{cD;=d~s9(1@e$5q~@3 zMpR2&2=Vucl&H}3I*)j~ZFftJnbK0J5=u~3P7>kTyTpxFv)2W?cCKO(El9R4i;D*E z2xTmNn1bZaX+kC}OO$IFE9R;j5VwpR2(auVWQ-Ue5#cRqODcxY2%OAvs`JX4++oD> z3PpbMc}4sOT2a;EyZfbm0=WnyeKfFl|1wa{ojYjAO{SVbLHBXI3e~$H2eAdUdMTX} z?5*)dq3sJ{$m%EovaH2zMN)TO+7P5KqVzy{Nc03Uf-*tAz$y39+p#9-$h<7r@tkuI;%~WC3s>%Hw2PJtX@(f;4GYqP|@d0Hvl1& zIYf)LD&)G8D4S~od~A%44 z8(tb-Fl_>FlAB_$+|knq$sbT1Fe%riE7u^W0b^y}01iu>p%Y?vIHD5CeS(l8AxAHH zFc_ENE*&UE&wF(dRddS;Rj3uil&iof1xE)AXzZ|kgc^nyRhbyqPt-Jw7G)6^11&|X zQD)$Mq)Bb+BzizFRXzdPP8nv~uqJZkKA5-1#UZ?bvpHr63 z6{IVeq!Y^+Ahkosz3e`aG{NJYl!4MbBhqv<*eJO>)&7)K5*D<4Gu~FeMZwaeEtYKr zs7e3q7^C4{YMPRvl674C&6A-DYUi; zG&HV+D7k~HvoOlny#A$nsQDy&b>cGo#41P;p{fHAv1=sU?bP$Wm$`!Y1E>ve$xZAa1&O{egdSXDC4pVqEF({Bu0omAzcX6 zvnnsfvd^`kme_v+ic98NQ?=8M_^RHS_2Ggw;iJS35t1KsdBHcHbcLv?)UlIqz8ImR zf?~sOA-?;C=)!?6*1b>&a&sCr^0Bp~)exA+v;3^ka7*Jz+Z1Oqnn!iflN&T_K& z($cm7Cr}OOHvqN1Z<~Y-&(G1}_qGE^LfD^!G_iz0-5muNt4WMOJ-{}rrswe@ujx2R z{@a#*t~SO5wq|x25%BGVf>5{2z$0aDMG%>0ftG;;f;1pFN%KHJ!l11O66t`Zl5K;1 z)QiK@_;8TO)rRI2F;Kc*A>|aFZ9}+dfKSnnk=7rfK!A1#{93fFl7C=IUTG2QW9v5) zCSRGL58XLF00Wf(&&9yRGn)c42y0FiiV z1i!EFhXQkOm+tWw>H(q{xZ$$=s)vH6I8-&HG9)EpCz$Y{jp)VmEDqFO)am2nhI5YZ zK%wnj#>u6oa$a}Bk@8eI{a$4RQ9^_NJ_K<6ma`{>PPG3iu*h0pg z?+Z3d$Ib^ii#62oU2)`oz)9(F1o1#qQpVte@5};he`i9Dy;|c&MevD_m$osO&=aiK zfs&B1qCp*Ys&ruhJBmp#hB#8<+aICReB-He>0CE?TxXr=Jf=Du>ju`25`oRZpyR0` zInHWYe4B?;y%sl6$*^;82++zqh%fpB_?-aT)-LSdNv-W`YtW~mxB$s&ID42k$gjXW zY2e^y;_Kit_1fKt{mv`S}=%Sai|>eOOpEB(4T?^ zt7fe6Of`Dd02X`t!P?N4d~AfCi+xeJC$SF3;8!cfMb@z<;le#?`xGvtS&3lKkc1ce zP;_WdpdGnDh}t`~eR4cOGGUu-6T={1-Q}BPtdAKaQE$o-5h9swAgvgeiz*yg(s_C` zL3T6*f>Y2L}>DdO@=0OIeP4Uc^Qt?3Ft*jX?y_1TZd&?a*QgYK$ zR+TtLn z{pe2k-;7Nbn#mhx0uYosi0|y+f-eyE!e<5g7L8*9Br%iyM>Rv3>ts|hnX5luHd$}b z(hG?1#~hfUI4b#Qg!G@E>v=~#J5nN_87t_7s@|H2`n%ga3u0b9=jL2f z?SpAno7sr-#gVzGF0K9wt`7UBzzU2^O%x97K|TpkY&*TCOvuw+?ifHx568@;sD}JB z?b51D4L8B~y7;@cn-cDLZ`@(Il4KCn{!+{(k`+1BI||@Oyrl}MH=45?0s0k9i7G7%#pm6% ze)Hx4NmH9a?z1v>5WG5X#ymY!JR})5UIW`|F|#Mc@N5iCdSL>xJ^c^=AHiNkedNVq zVf$k?HV)#GqM;26r2N>*?&&Qp!hjK+MGCSP1;6{f7vdGhJFg#p1zaad5i%5nV(5plEp<92B8Qz_K8piTFJZB8$Y=#o9N z+zSKp3AN^FP*`rm1W4wCKwIP?ELJh%$a^F-m74#>dKQeIf_SB~jiEF;A7p1^g82pk znHNZLh0UeL?~Pr#gMm{A@ayWD6W#9LcQ_O2ZyPQXbX850PhMW*ZjHPrUb{FDW{i zMY&r2VwV$w*y98-pF&Jf>6r6|B74xG=*NDy+GC~azqD;^sQtJNXhdVqUdUpqg!IS=(R^^6gG+Uf6$2!H~sNN?*80UB> z)19;^>j`-b-sFeh=NnZ7C%N+iG|X@m59VAX3y2o^&pXTFQc(jm*qZG^gI?7Ub9q$Q zPLmH7iwVU#7xuyj!5CDD05jJ4Yu#Uho)Q1h?BXCW8*H9v-Sh`zHK6A!R-}4I(n3^bmNhfDLbt*3x7aroIypHWn6?42FEcTKGU&Bvu1FF67OSpB;z2CB*UQ^@M6I7V2o*0*IhQsqptnY znzgx#hZ~sB_1dLqc{mn{)PeCz()2cWo1r1k62DqHG1!n~8|x9+0a!w92&r*@K{lBX z@=}>i(nvac3;21rJ3Fm4N&~H%;_g&AUw#R-_#yppP|g~pc}F*jlH zO4YZdTby!(mH)F;q46O)fg~VV4dl9QskM7_k*PblJm?iCeBmo|S$kboYDRp%D~Sh} zElLXw(MHvWYJyM(weWh7*iwBo{5;l=C*AA`A?Ar>Y*J_`29^Px!JO!+$+G)G5 zzPFru+oPO;%sIgWPBbg}s+jN{)kvoSDVc_!Jv#HI8wh{(HBC;O-7l?Pz9j5p{GdRKogZQ5+w#r)&;L*QpXwEIf4eq`n5G$QRMhVYIGmtbh|bLS<#{ymWDs>z zP5^&8x94s{(f(too&y$Ir;&w=CXxCQ)A$}C6jx`#1uWFKXU!zEpPkbDf~xazBLzh% zM@Cn)XL?2_LT1zexD5w%SkN?+HY-rkG1=sr;V`zkP@NTxcmy)e6LEs&k>ez-*8=7C zzFxB6#shQX{-0zyDl`OrdF28W6eI&ww>>l51{-lW|FYIK68kv^RgY!&_LudvXEX=2 z$Gso@+ULbE9W^)pz0as^vme)5|6^-4k1jTYKP??p=o=`rn4ZZM-ns8I?&RBt4F#BV|4S?zibP6I`q1rJ z`}u+>aR2&xOuqP2JlTLZ+r%QLyNP|f?Qx9w`|hg5_}J*?Y8kB8yy83WiAw@eAT#^` zs~u35vb`&er>$K0XRNB+!JA}uJLlF&X!y#EEIk(XXXFQHB1Q}6OgubxoUBR-rc@sC z$)P*f3LAl+nHx?nZ2|5LTSl1KY6rbKKBHX*5II*b5O2JI`X$mEI3y8~PGFg&hRg60 zvFaF0o2rw;=X`0h==8NqcZaOzywR+lcL}@q2eax46|jEZyY_sccToZ$+Nj0u=s^&3 zE5aPt&0af?+E5Xp5#y>xm<3H4q3tZijAs{Qovh71pH|U@@qUINHIERuVY*2_Nuxz; z*Tf*;yeuYz0_@-wuaKTsQ%aTF_W&Z}Wxuew(F*b1*_Dej(Uzi^bh zmzog&K7X*%w`0lc(9M}DYl{ZE^OKlFA)JAy2I7OT;^;&tfz84EK-mdL1LOf%^eJaT z1p1WQP~Ors4>R1>^Z?SXG~$ksHw(g!fvwDA*F;hqboIduCqso74$=@o=r@-!qD`Zu z;D~`n&c{CeA42BlXrWPIgpmZGT_<`ASAl(Q)c^Dbo4Oqre69x&KU;;j10?pSBp|31 z?ubM;O|R-=)$;-YiaGe+wmlz-^Gw|{#6kUr#ZzHD1~$^!39zh8cI>hpHKLwMigsSrD{}#+Ze5UU zsjk-SVuJL{P%0kctfFzIwJ&XGcCb>e zCvrs1W4lDO7&eBa7T}IkSLBytcr$N*u>0=KRAC1bI`9yJ^bc<)r`UiQW0qYF4?2(% z+ApD%MD;$Zhpw@MRb0NjqtM;BD6s%`*um#AqdYX2Ny+1=^s?NdZ~3>FyGgSQxj^Vz z&??2L(3MhQY&;XUz(N+yLW-Oj)@vqW;XzQ7bP#jqzLsUGLJO)Gpa=L#aSte3}2kM;vtGJhMczKrzX|nUWty8wCEJh zv9Gei>DYm-m_pA*sYrt-WLr1w*MJ0_VQupgMwwB|5^$t=@o7f2Nt|ZXMv7Jbv!!#c zD!}M~76x+D49}S>n7V}(VONv?@h}hI)>qNQ&z2T}RB=ZgLPNd4gHI%9MyJ*D_r!Gy z7kISo@wj73#_)DeXZW}gMPJ1}FATX$R>-p!LQZuGwreR&!Z0)C z6cC?1AY*mWB~z?vMFFu}k?#Z4@)WWzp8}>`_Z*Y3A*L2Y!UqKD0p{>1@dh-!1aWgH^Oem%q|^{5b~Hkel$9WL57-_46Q! zv9dirRvS?O(X3wCF0&!4Q!aEDnO{j-=%VN!O(RRA}(Uz!#hB_O2d5vBl36BV+<5K>ce0uq__@orPJ1dsq=a3-&6 zMFb@&%bB!7ma%O9YzLpx!Ud9uP+Q6a@pYskF(k6lLHq}dtXia9BSw(1VK}#3F8Cuu zgZpkuSJs9R#R@l+;;Mo@Mom-UmgH1cQ{62llg zkcJp|Wa(@n&(DDLXG2uiz9ir_A;#i@4#Zf)w>@yY9=ZdfU8gYqW_Q)A^Xa5}pwf^C zrUaxn-_VeYri>7xGoO9E6&+81w8T8i(X91ZcQwv}8IP41Iu#>3 zML=FBCqV_V=}KEK^iiz#^BD& zO=Rxn$X@WUpBOg-vlesbko~aDRZYU6bKTWRq^}Bq_~p^X#qYP4Qv+R`C5ri1avXso zAOdCU&XEYE(*ghH*`|_ZQg``(W-~{|;FqO269}2*1N#+lry77_HEhG{&R5bGFAqQ7 zMh364$B+-*R_er#ZwUUpk-`A^QYAz|Z-M4hf9Jmc^so@0Z3Luc+Tmh3* z*#$P-ADQ?R8sy8;F#X=CqVF|^fvYHEW%B2u!;eIxh~eJDdb|@PrR~SyUYaI-^w*uz z_c=q?d^`_FiMfEs=RcVE{uHS9-p8i)uvciL*!4qiu!GO15Vm*iCI7-}czrLTG*52K zRoOx=KU**scHZ(M^q3=@hQy?Ubbr+wn=ox?H}@V90*^VMT{bY3zOH9)TXvf2(9 zcTFaJU%=7yBR=vNpl8MzplpvsK__I+*YakGoz5oQ5-GxA12_}w1W1>pB}}9+r%Y4! z(>}wSBB{YMKhcuf16x8%XM#40kOS1z;Al`*@3@3+^j$k$vK3qG%-&WY8_x}u6-8EH~w<6!@3DkboTQi?hPd^YuvPJ*a`Y%JF})l;UPy5s;1 m)0=J zV0p1(=A!$Zg*5aT|luMa2Sv^JvAq6%QXbvqgff{k(aH8RQ@ZMl% zsDSq4<{8ttQjOu=VR^A=Q3g|x`mN=%5TXLJa;{X2#GbK#(qyx>mxh-X#0BU@f$UaL zR3D+AH^IDkvKt`Agsc{p}Z$T6(0ZRGxN@ zbRC_OrQslO8{{+$R91{2yA`S&fJK75$^zW6MONoN7;~5rEQ_bq z_SE~tcvjetYaOn-ia?O>(4lI6t13F@u-sXq5_$oFUqzBMq>d-a0J;(@pNGhj6j3;m zCpHrT2luj=J|)M1p>$309j|oug1oLEHs&Ed%c8%yh)g6?#^e;r8e`NDmWmIfS8k?MZ; zc6>CLKZuiAdxP#YK(>RYXmp^u#OMKmJiY=Vx!;t=6n1wv2KS_rf6dlrRkqLQ@(jdp zN_eA|unhAU;qgAeM6oGhKBo5TI@X`dB-&L6|^si56d+L%$sD)G`<;d+A z=__%j@Hj6N>?1gP{*qs$V3og&JcH2>)NQtNgkwHSo4sVB*1YW_q5q)Ze(E<^CZ)3j zL^4jwvjf62X^lyF+wDTibq-@?na2UEBKwC!+B1i;ut1ED+0s;qWKVdpM?#8rNK3(G zevW{F3hrkN!Y_GXAP>Qc%qh4F^hS*(5fR6%n}@dd!H`L)b&YXqPTnSXCu$b+VCG;o zD+r5WzI5?!$tCjKI9>qg0Y;GJOGnz>^+WjCi|cFx?Dn>QDJ;?#NW$Ro$~!RcY6eV< z+lI!^(|1-lfNMAqNoR0nwt74;K|+el0!DyaO;i52s}54c`G%OqE~sFyMATq+A_oGQ zban}EU@7V|sD>TwTo?B-m5`c3^A zSUjrVnzUlYI`?gv>3rPAx>;^;!r)Bv}^FODQdG0uhdBj$6!$ym`$g4UCDcQg6>yJ#@?m zYR-@hQadJFdPsyKg?ZFBgx+)*rkyv+WT~i`s2Q?XPUX9I4;AWnxT$cqo(amtIdYv<}aofT^K&=r1CzE1kih~-0zTY2mG0i?2UmOJqOOocmx zlI(dA5kX)=@_32{g*w$&e15DiZXd$aO`rIyiJCs+%+O0?JVMI_YmS1GKL3k$Q?dJ1 zO|b0F3Sth>*R#7qJyK?8XU>EIdrmCi>(z_9Y6ZTfJi02sd5U=jXQkfLI=cL{`ZxIF zJnQ!h!evp4lSfE7b;zugV$TtGf~4AzMlqmKK-TLh=ut~Z^Mu@-yt;*!4Twa(B2u#> z-d76{Sp6C%=-?`eOS)=hZ9>cat#Ripq>h60`7;yk3ngsx@j)3;CSjs>ry|yIr(khb zTWf%>E&O}hro!+yN<8^CqTO~E${rZ1<|D0SAYut^Fb~U)O#CQP3h#}v>I%LGtkazS zEe??vybL+eil$v;1%Za2zKRQlVZ;%ZNm;M}s5n zQ^gd535VgUAWE1HF(K;)#$~#cAYzp&PG0ZolvSi0N6Dxn%cDTP;a9`h&nSnU#)dr0hp5H1$(l$usKqRCs-o#hYBslp#Y2^POz?3Ti{3B^=fx<5Z5 z2>|W>`*?}$%CV{;8?;927m(;!fapxP!+n>`8dz5%VQs zF5C>6zW1@k9u