Skip to content

Commit

Permalink
snappr: Rework pruning logic
Browse files Browse the repository at this point in the history
Previously, while the result of a prune is technically correct, pruning
after each snapshot with multiple intervals per unit may remove
snapshots which do not yet meet the conditions for the longer interval,
but would later be needed for it (e.g., the 5th secondly:1h snapshot
with the policy `4@secondly:1h 4@secondly:2h`, even though it would
later be used for the 3rd secondly:2h snapshot).

I now calculate snapshot periods starting from zero time, taking the
first one in each period. Not shifting the reference time as
snapshots are added prevents the aforementioned issue.

I have added a test to ensure this does not happen. Running the old
snappr.go with the new tests results in an error: snappr_test.go:362:
subset 2480->2992: prune consistency: Prune([:2992])=15 !=
Prune(Prune([:2480]) + [2480:2992])=14.
  • Loading branch information
pgaskin committed Nov 16, 2023
1 parent de8a5be commit 649f523
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 123 deletions.
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
Works with any tool or script which can output a list with dates somewhere in it.

- **Approximate snapshot selection.** \
Snapshots periods are not fixed to specific dates. While the latest one in each period (e.g., the last day of a month) will be used if possible, ones from other days will still be retained if required.
Snapshots periods are not fixed to specific dates. The first matching snapshot for each period is kept (note that this means you'll usually want to keep at least the last snapshot in addition to whatever other rules you have).

- **Robust retention policies.** \
Multiple intervals are supported for each period (last, secondly, daily, monthly, yearly). You can have one snapshot every month for 6 months, while also having one every two for 12.
Expand All @@ -27,9 +27,6 @@
> [!WARNING]
> This tool is still in development. While most functionality has been tested and I am using this as part of my own backup scripts, it may still have rough edges, and the command-line interface and API are subject to change. Full automated tests have not been implemented yet.
> [!NOTE]
> **Known Issue:** While the result of a prune is technically correct, pruning after each snapshot with multiple intervals per unit may remove snapshots which do not yet meet the conditions for the longer interval, but would later be needed for it (e.g., the 5th secondly:1h snapshot with the policy `4@secondly:1h 4@secondly:2h`, even though it would later be used for the 3rd secondly:2h snapshot). I am still considering the advantages and disadvantages of the possible ways to fix this. For now, either run prune at an interval larger than the longest interval for a unit with multiple intervals, or don't use multiple intervals for a single unit.
#### CLI Example

```bash
Expand Down
152 changes: 61 additions & 91 deletions snappr.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,36 +54,6 @@ func (u Unit) Compare(other Unit) int {
return cmp.Compare(u, other)
}

// TimeEquals checks whether a and b are equal when truncated to the provided
// unit.
func (u Unit) TimeEquals(a, b time.Time) bool {
if !u.IsValid() {
return false
}
a = a.Truncate(-1)
b = b.Truncate(-1)
switch u {
case Last:
return a.Equal(b)
case Secondly:
return a.Unix() == b.Unix()
case Daily:
ay, am, ad := a.Date()
by, bm, bd := b.Date()
return ay == by && am == bm && ad == bd
case Monthly:
ay, am, _ := a.Date()
by, bm, _ := b.Date()
return ay == by && am == bm
case Yearly:
ay, _, _ := a.Date()
by, _, _ := b.Date()
return ay == by

}
panic("wtf")
}

// Period is a specific time interval for snapshot retention.
type Period struct {
Unit Unit
Expand Down Expand Up @@ -137,28 +107,6 @@ func (p Period) Compare(other Period) int {
return cmp.Compare(p.Interval, other.Interval)
}

// PrevTime gets the previous interval for the provided time. The time is not
// truncated to the start of the interval.
func (p Period) PrevTime(t time.Time) time.Time {
if !p.Unit.IsValid() {
return time.Time{}
}
t = t.Truncate(-1)
switch p.Unit {
case Last:
return t.Add(-1)
case Secondly:
return t.Add(-time.Second * time.Duration(p.Interval))
case Daily:
return t.AddDate(0, 0, -p.Interval)
case Monthly:
return t.AddDate(0, -p.Interval, 0)
case Yearly:
return t.AddDate(-p.Interval, 0, 0)
}
panic("wtf")
}

// Policy defines a retention policy for snapshots.
//
// All periods are valid and normalized.
Expand Down Expand Up @@ -379,6 +327,11 @@ func (p Policy) MarshalText() ([]byte, error) {
// guarantees provided by Prune.
func Prune(snapshots []time.Time, policy Policy) (keep [][]Period, need Policy) {
need = policy.Clone()
keep = make([][]Period, len(snapshots))

if len(snapshots) == 0 {
return
}

// sort the snapshots descending
sorted := make([]int, len(snapshots))
Expand All @@ -388,52 +341,69 @@ func Prune(snapshots []time.Time, policy Policy) (keep [][]Period, need Policy)
slices.SortFunc(sorted, func(a, b int) int {
return snapshots[a].Compare(snapshots[b])
})
slices.Reverse(sorted)

// figure out which ones to keep
keep = make([][]Period, len(snapshots))
lastPeriod := map[Period]time.Time{}
lastPeriodIdx := map[Period]int{}
lastUnit := [numUnits]time.Time{}
for _, idx := range sorted {
at := snapshots[idx].Truncate(-1) // remove monotonic component

need.Each(func(period Period, count int) {
if count == 0 {
return
policy.Each(func(period Period, count int) {
var (
match = make([]bool, len(snapshots))
last int64 // period index
prev bool
)
// start from the beginning, marking the first one in each period
for i := range snapshots {
var current int64
switch t := snapshots[sorted[i]].Truncate(-1); period.Unit {
case Last:
match[i] = true
continue
case Secondly:
current = t.Unix()
case Daily:
n, x := t.Year(), 0

x = n / 400
current += int64(x * (365*400 + 97)) // days per 400 years
n -= x * 400

x = n / 100
current += int64(x * (365*100 + 24)) // days per 100 years
n -= x * 100

x = n / 4
current += int64(x * (365*4 + 1)) // days per 4 years
n -= x * 4

current += int64(x) + int64(t.YearDay())
case Monthly:
year, month, _ := t.Date()
current = (int64(year)*12 + int64(month))
case Yearly:
current = int64(t.Year())
default:
panic("wtf")
}
current /= int64(period.Interval)

// we don't care about times for the Last unit
if period.Unit == Last {
keep[idx] = append(keep[idx], period)
if count > 0 {
need.count[period]--
}
return
if !prev || current != last {
match[i] = true
last = current
prev = true
}

// check if we need this snapshot for the specified policy
if last := lastPeriod[period]; !last.IsZero() { // if we already have the first snapshot
if want := period.PrevTime(last); want.Before(at) { // and on or ahead of schedule
if !period.Unit.TimeEquals(want, at) { // and not scheduled for one in this period+unit
return // then skip this snapshot
}
}
}
// preserve from the end and stay within the count
for i := range match {
i = len(match) - 1 - i
if count == 0 {
break
}

// see if can't reuse the existing snapshot for the unit-truncated time (i.e., disregarding the interval)
if have := lastUnit[period.Unit]; have.IsZero() || !period.Unit.TimeEquals(have, at) { // if another interval already caused a retention for this unit
lastPeriod[period] = at
lastPeriodIdx[period] = idx
if !match[i] {
continue
}

// keep the snapshot
keep[lastPeriodIdx[period]] = append(keep[lastPeriodIdx[period]], period)
if count > 0 {
need.count[period]--
count--
}
})
}

keep[sorted[i]] = append(keep[sorted[i]], period)
}
need.count[period] = count
})
return
}
90 changes: 62 additions & 28 deletions snappr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ func pruneCorrectness(snapshots []time.Time, policy Policy) error {
var (
prevNeed Policy
prevSubset = -1
lastKept []time.Time
)
for i, subset := 0, 0; subset < len(snapshots); i++ {
allSnapshots := snapshots
Expand Down Expand Up @@ -244,6 +245,33 @@ func pruneCorrectness(snapshots []time.Time, policy Policy) error {
*/
// TODO

/**
* Incrementally pruning snapshots will result in the same amount of
* snapshots as pruning them all at once.
*/
if subset != 0 {
lastKept = append(lastKept, snapshots[prevSubset:]...)
pKeep, _ := Prune(lastKept, policy)

var incN, absN int
lastKept = lastKept[:0]
for _, reason := range pKeep {
if len(reason) != 0 {
incN++
}
}
for at, reason := range keep {
if len(reason) != 0 {
lastKept = append(lastKept, snapshots[at])
absN++
}
}

if incN != absN {
return fmt.Errorf("subset %d->%d: prune consistency: Prune([:%d])=%d != Prune(Prune([:%d]) + [%d:%d])=%d", prevSubset, subset, subset, absN, prevSubset, prevSubset, subset, incN)
}
}

/**
* Add an increasing number of snapshots at a time (if the first few
* work fine wrt the prune consistency checks, it's unlikely that adding
Expand Down Expand Up @@ -288,7 +316,7 @@ func TestPrune(t *testing.T) {
policy.MustSet(Secondly, int(time.Hour/time.Second), 6)
policy.MustSet(Last, 1, 3)

return times, policy, "bf49acdf6f509786338a6646f7e17a4a4d7bdc987329c0b368f9c383dc56b0e3"
return times, policy, "a48749a9d6e92ebbc09a5fb3b46a304879fdb1aeebe28264c0885cea0048f8d1"
},
// TODO: more cases
} {
Expand Down Expand Up @@ -377,34 +405,40 @@ func ExamplePrune() {

// Output:
// last (3), 1h time (6), 1 day (7), 2 month (6), 6 month (4), 1 year (3), 2 year (10), 5 year (inf)
// Fri Dec 31 23:55:29 1999 | 2 year
// Mon Dec 31 23:34:57 2001 | 2 year
// Wed Dec 31 23:53:53 2003 | 2 year, 5 year
// Sat Dec 31 23:53:06 2005 | 2 year
// Mon Dec 31 23:52:17 2007 | 2 year
// Wed Dec 31 23:41:54 2008 | 5 year
// Thu Dec 31 23:51:30 2009 | 2 year
// Sat Dec 31 23:40:26 2011 | 1 year, 2 year
// Thu May 31 23:33:05 2012 | 6 month
// Wed Oct 31 23:35:45 2012 | 6 month
// Mon Dec 31 23:10:18 2012 | 2 month, 1 year
// Thu Jan 31 23:53:21 2013 | 2 month
// Sun Mar 31 23:17:06 2013 | 2 month, 6 month
// Fri May 31 23:32:10 2013 | 2 month
// Wed Jul 31 23:57:29 2013 | 2 month
// Mon Sep 2 23:41:05 2013 | 1 day
// Tue Sep 3 23:51:06 2013 | 1 day
// Wed Sep 4 23:51:53 2013 | 1 day
// Thu Sep 5 23:31:54 2013 | 1 day
// Fri Sep 6 23:52:26 2013 | 1 day
// Sat Sep 7 23:12:42 2013 | 1 day
// Sun Sep 8 16:47:35 2013 | 1h time
// Fri Dec 31 23:55:29 1999 | 2 year, 5 year
// Sat Jan 1 00:36:00 2000 | 2 year, 5 year
// Tue Jan 1 00:45:28 2002 | 2 year
// Thu Jan 1 00:04:24 2004 | 2 year
// Sat Jan 1 00:04:16 2005 | 5 year
// Sun Jan 1 00:43:52 2006 | 2 year
// Tue Jan 1 00:02:48 2008 | 2 year
// Fri Jan 1 00:42:16 2010 | 2 year, 5 year
// Sat Jan 1 00:11:21 2011 | 1 year
// Thu Dec 1 00:18:09 2011 | 6 month
// Sun Jan 1 00:01:12 2012 | 1 year, 2 year
// Fri Jun 1 00:43:36 2012 | 6 month
// Mon Oct 1 00:13:28 2012 | 2 month
// Sat Dec 1 00:38:47 2012 | 2 month, 6 month
// Tue Jan 1 00:01:04 2013 | 1 year
// Fri Feb 1 00:33:52 2013 | 2 month
// Mon Apr 1 00:27:37 2013 | 2 month
// Sat Jun 1 00:12:41 2013 | 2 month, 6 month
// Thu Aug 1 00:38:00 2013 | 2 month
// Mon Sep 2 00:01:04 2013 | 1 day
// Tue Sep 3 00:31:51 2013 | 1 day
// Wed Sep 4 00:01:37 2013 | 1 day
// Thu Sep 5 00:32:24 2013 | 1 day
// Fri Sep 6 00:12:25 2013 | 1 day
// Sat Sep 7 00:43:12 2013 | 1 day
// Sun Sep 8 00:03:28 2013 | 1 day
// Sun Sep 8 18:18:52 2013 | 1h time
// Sun Sep 8 19:29:23 2013 | 1h time
// Sun Sep 8 20:40:55 2013 | 1h time
// Sun Sep 8 22:12:12 2013 | last, 1h time
// Sun Sep 8 23:22:43 2013 | last
// Sun Sep 8 23:33:14 2013 | last, 1h time, 1 day, 2 month, 6 month, 1 year, 2 year, 5 year
// Sun Sep 8 19:09:38 2013 | 1h time
// Sun Sep 8 20:20:09 2013 | 1h time
// Sun Sep 8 21:51:26 2013 | 1h time
// Sun Sep 8 22:01:57 2013 | 1h time
// Sun Sep 8 22:12:12 2013 | last
// Sun Sep 8 23:22:43 2013 | last, 1h time
// Sun Sep 8 23:33:14 2013 | last
// last (0), 1h time (0), 1 day (0), 2 month (0), 6 month (0), 1 year (0), 2 year (2), 5 year (inf)
}

Expand Down

0 comments on commit 649f523

Please sign in to comment.