Skip to content

Commit

Permalink
Rebase on RANS branch and use common methods from CRAMInteropTestUtil…
Browse files Browse the repository at this point in the history
…s class
  • Loading branch information
yash-puligundla committed Oct 26, 2023
1 parent 1f9c2a1 commit a4b2837
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 78 deletions.
11 changes: 1 addition & 10 deletions src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,8 @@ public static Path getInteropTestDataLocation() {
return Paths.get(INTEROP_TEST_FILES_PATH);
}

// Given a test file name and the codec, map it to the corresponding compressed file path
public static final Path getCompressedCodecPath(final String codecType, final Path uncompressedInteropPath, int formatFlags) {

// Example uncompressedInteropPath: q4, codecType: r4x16, formatFlags: 193 => compressedFileName: r4x16/q4.193
// the substring after "." in the compressedFileName is the formatFlags or the first byte of the compressed stream
final String compressedFileName = String.format("%s/%s.%s", codecType, uncompressedInteropPath.getFileName(), formatFlags);
return uncompressedInteropPath.getParent().resolve(compressedFileName);
}

// the input files have embedded newlines that the test remove before round-tripping...
public static final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException {
protected static final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException {
// 1. filters new lines if any.
// 2. "q40+dir" file has an extra column delimited by tab. This column provides READ1 vs READ2 flag.
// This file is also new-line separated. The extra column, '\t' and '\n' are filtered.
Expand Down
76 changes: 8 additions & 68 deletions src/test/java/htsjdk/samtools/cram/RangeInteropTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
Expand All @@ -20,6 +19,11 @@
import java.util.ArrayList;
import java.util.List;

import static htsjdk.samtools.cram.CRAMInteropTestUtils.filterEmbeddedNewlines;
import static htsjdk.samtools.cram.CRAMInteropTestUtils.getInteropCompressedFilePaths;
import static htsjdk.samtools.cram.CRAMInteropTestUtils.getParamsFormatFlags;
import static htsjdk.samtools.cram.CRAMInteropTestUtils.getUnCompressedFilePath;

public class RangeInteropTest extends HtsjdkTest {
public static final String COMPRESSED_RANGE_DIR = "arith";

Expand All @@ -30,13 +34,13 @@ public Object[][] getRoundTripTestCases() throws IOException {
// compressed testfile path, uncompressed testfile path,
// Range encoder, Range decoder, Range params
final List<Object[]> testCases = new ArrayList<>();
for (Path path : getInteropRangeCompressedFilePaths(COMPRESSED_RANGE_DIR)) {
for (Path path : getInteropCompressedFilePaths(COMPRESSED_RANGE_DIR)) {
Object[] objects = new Object[]{
path,
getRangeUnCompressedFilePath(path),
getUnCompressedFilePath(path),
new RangeEncode(),
new RangeDecode(),
getRangeParams(path)
new RangeParams(getParamsFormatFlags(path))
};
testCases.add(objects);
}
Expand Down Expand Up @@ -109,68 +113,4 @@ public void testDecodeOnly(
}
}

// the input files have embedded newlines that the test remove before round-tripping...
private final byte[] filterEmbeddedNewlines(final byte[] rawBytes) throws IOException {
// 1. filters new lines if any.
// 2. "q40+dir" file has an extra column delimited by tab. This column provides READ1 vs READ2 flag.
// This file is also new-line separated. The extra column, '\t' and '\n' are filtered.
try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
int skip = 0;
for (final byte b : rawBytes) {
if (b == '\t'){
skip = 1;
}
if (b == '\n') {
skip = 0;
}
if (skip == 0 && b !='\n') {
baos.write(b);
}
}
return baos.toByteArray();
}
}

// return a list of all encoded test data files in the htscodecs/tests/dat/<compressedDir> directory
private List<Path> getInteropRangeCompressedFilePaths(final String compressedDir) throws IOException {
final List<Path> paths = new ArrayList<>();
Files.newDirectoryStream(
CRAMInteropTestUtils.getInteropTestDataLocation().resolve("dat/"+compressedDir),
path -> Files.isRegularFile(path))
.forEach(path -> paths.add(path));
return paths;
}

// Given a compressed test file path, return the corresponding uncompressed file path
public static final Path getRangeUnCompressedFilePath(final Path compressedInteropPath) {
String uncompressedFileName = getUncompressedFileName(compressedInteropPath.getFileName().toString());
// Example compressedInteropPath: ../dat/r4x8/q4.1 => unCompressedFilePath: ../dat/q4
return compressedInteropPath.getParent().getParent().resolve(uncompressedFileName);
}

public static final String getUncompressedFileName(final String compressedFileName) {
// Returns original filename from compressed file name
int lastDotIndex = compressedFileName.lastIndexOf(".");
if (lastDotIndex >= 0) {
return compressedFileName.substring(0, lastDotIndex);
} else {
throw new CRAMException("The format of the compressed File Name is not as expected. " +
"The name of the compressed file should contain a period followed by a number that" +
"indicates the order of compression. Actual compressed file name = "+ compressedFileName);
}
}

public static final RangeParams getRangeParams(final Path compressedInteropPath){
// Returns RangeParams from compressed file path
final String compressedFileName = compressedInteropPath.getFileName().toString();
final int lastDotIndex = compressedFileName.lastIndexOf(".");
if (lastDotIndex >= 0 && lastDotIndex < compressedFileName.length() - 1) {
return new RangeParams(Integer.parseInt(compressedFileName.substring(lastDotIndex + 1)));
} else {
throw new CRAMException("The format of the compressed File Name is not as expected. " +
"The name of the compressed file should contain a period followed by a number that" +
"indicates the order of compression. Actual compressed file name = "+ compressedFileName);
}
}

}

0 comments on commit a4b2837

Please sign in to comment.