Skip to content

Commit

Permalink
reducing memory required with merging intervals with first name only
Browse files Browse the repository at this point in the history
  • Loading branch information
meganshand committed Jun 25, 2024
1 parent 127f3de commit d1dc5cc
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 1 deletion.
16 changes: 15 additions & 1 deletion src/main/java/htsjdk/samtools/util/IntervalList.java
Original file line number Diff line number Diff line change
Expand Up @@ -888,20 +888,29 @@ public Interval next() {

private Interval getNext() {
Interval next;
int start = current == null ? -1 : current.getStart();
while (inputIntervals.hasNext()) {
next = inputIntervals.next();
if (current == null) {
toBeMerged.add(next);
current = new MutableFeature(next);
start = next.getStart();
currentStrandNegative = next.isNegativeStrand();
} else if (current.overlaps(next) || (combineAbuttingIntervals && current.withinDistanceOf(next,1))) {
if (enforceSameStrands && currentStrandNegative != next.isNegativeStrand()) {
throw new SAMException("Strands were not equal for: " + current.toString() + " and " + next.toString());
}
toBeMerged.add(next);
if (concatenateNames) {
toBeMerged.add(next);
}
current.end = Math.max(current.getEnd(), next.getEnd());
} else {
// Emit merged/unique interval
if (!concatenateNames) {
if (start!=-1) {
toBeMerged.add(new Interval(current.contig, start, current.getEnd()));
}
}
final Interval retVal = merge(toBeMerged, concatenateNames);
toBeMerged.clear();
current.setAll(next);
Expand All @@ -911,6 +920,11 @@ private Interval getNext() {
}
}
// Emit merged/unique interval
if (!concatenateNames) {
if (start!=-1) {
toBeMerged.add(new Interval(current.contig, start, current.getEnd()));
}
}
final Interval retVal = merge(toBeMerged, concatenateNames);
toBeMerged.clear();
current = null;
Expand Down
44 changes: 44 additions & 0 deletions src/test/java/htsjdk/samtools/util/IntervalListTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -702,4 +702,48 @@ public static Object[][] brokenFiles() {
public void testBreaks(final Path brokenIntervalFile){
IntervalList.fromPath(brokenIntervalFile);
}

@Test
public void testLargeIteratorMerge() {
final IntervalList intervals = new IntervalList(this.fileHeader);
intervals.add(new Interval("1", 1, 2, false, "foo"));
for (int i = 2; i < 100000; i++) {
intervals.add(new Interval("1", i, i + 1, false, "bar"));
}
final Interval merged = new IntervalList.IntervalMergerIterator(intervals.iterator(), true, false, false).next();
Assert.assertEquals(merged, new Interval("1", 1, 100000, false, "foo"));
}

@DataProvider
public static Object[][] lessMemForMergeWithNoNames() {
String contig = "1";
Interval interval1 = new Interval(contig, 1, 100);
Interval interval2 = new Interval(contig, 101, 200);
Interval interval3 = new Interval(contig, 301, 400);
Interval overlapInterval = new Interval(contig, 350, 450);
Interval interval4 = new Interval(contig, 401, 500);
Interval combined1 = new Interval(contig, 1, 200);
Interval combined2 = new Interval(contig, 301, 500);
return new Object[][]{
{Arrays.asList(interval1), Arrays.asList(interval1)},
{Arrays.asList(interval1, interval2), Arrays.asList(combined1)},
{Arrays.asList(interval1, interval2, interval3), Arrays.asList(combined1, interval3)},
{Arrays.asList(interval1, interval2, interval3, interval4), Arrays.asList(combined1, combined2)},
{Arrays.asList(interval1, interval2, interval3, overlapInterval, interval4), Arrays.asList(combined1, combined2)},
};
}

@Test(dataProvider = "lessMemForMergeWithNoNames")
public void testLessMemForMergeWithNoNames(final List<Interval> intervals, final List<Interval> expected) {
final IntervalList intervalList = new IntervalList(this.fileHeader);
intervalList.addall(intervals);

final IntervalList.IntervalMergerIterator firstNameMergerIterator = new IntervalList.IntervalMergerIterator(intervals.iterator(), true, false, false);
Collection<Interval> firstNameMerged = CollectionUtil.makeCollection(firstNameMergerIterator);
Assert.assertEquals(firstNameMerged, expected);

final IntervalList.IntervalMergerIterator concatNameMergerIterator = new IntervalList.IntervalMergerIterator(intervals.iterator(), true, false, true);
Collection<Interval> concatNameMerged = CollectionUtil.makeCollection(concatNameMergerIterator);
Assert.assertEquals(concatNameMerged, expected);
}
}

0 comments on commit d1dc5cc

Please sign in to comment.