From 9ee96763e2ceeed27334f6b686832486fef11909 Mon Sep 17 00:00:00 2001 From: Alf Mikula Date: Tue, 8 Sep 2020 06:48:22 -0700 Subject: [PATCH 1/4] Add test for keeping oldest archive when retention target is not met --- src/borg/testsuite/helpers.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/borg/testsuite/helpers.py b/src/borg/testsuite/helpers.py index 795ea7680..ed7ccbf1b 100644 --- a/src/borg/testsuite/helpers.py +++ b/src/borg/testsuite/helpers.py @@ -407,6 +407,38 @@ def test_prune_split(rule, num_to_keep, expected_ids): assert kept_because[item.id][0] == rule +def test_prune_split_keep_oldest(): + def subset(lst, ids): + return {i for i in lst if i.id in ids} + + archives = [ + # oldest backup, but not last in its year + MockArchive(datetime(2018, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 1), + # an interim backup + MockArchive(datetime(2018, 12, 30, 10, 0, 0, tzinfo=timezone.utc), 2), + # year end backups + MockArchive(datetime(2018, 12, 31, 10, 0, 0, tzinfo=timezone.utc), 3), + MockArchive(datetime(2019, 12, 31, 10, 0, 0, tzinfo=timezone.utc), 4), + ] + + # Keep oldest when retention target can't otherwise be met + kept_because = {} + keep = prune_split(archives, "yearly", 3, kept_because) + + assert set(keep) == subset(archives, [1, 3, 4]) + assert kept_because[1][0] == "yearly[oldest]" + assert kept_because[3][0] == "yearly" + assert kept_because[4][0] == "yearly" + + # Otherwise, prune it + kept_because = {} + keep = prune_split(archives, "yearly", 2, kept_because) + + assert set(keep) == subset(archives, [3, 4]) + assert kept_because[3][0] == "yearly" + assert kept_because[4][0] == "yearly" + + class IntervalTestCase(BaseTestCase): def test_interval(self): self.assert_equal(interval('1H'), 1) From 943088df2c3317ff7d4e5b5e1c759ddb397f7b20 Mon Sep 17 00:00:00 2001 From: Alf Mikula Date: Tue, 8 Sep 2020 06:49:35 -0700 Subject: [PATCH 2/4] Keep oldest archive when retention target is not met --- src/borg/helpers/misc.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/borg/helpers/misc.py b/src/borg/helpers/misc.py index 3e62b3e4a..ea8655ab5 100644 --- a/src/borg/helpers/misc.py +++ b/src/borg/helpers/misc.py @@ -58,6 +58,10 @@ def prune_split(archives, rule, n, kept_because=None): kept_because[a.id] = (rule, len(keep)) if len(keep) == n: break + # Keep oldest archive if we didn't reach the target retention count + if len(keep) < n and a.id not in kept_because: + keep.append(a) + kept_because[a.id] = (rule+"[oldest]", len(keep)) return keep From 69a58d2be01e9e5385ef0615d24a3692bfe6de59 Mon Sep 17 00:00:00 2001 From: Alf Mikula Date: Wed, 9 Sep 2020 05:19:06 -0700 Subject: [PATCH 3/4] Fix tests which relied on dropping oldest archive when retention target not met --- src/borg/testsuite/archiver.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 339b6917b..4b41ec042 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -2029,7 +2029,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '::test3.checkpoint', src_dir) self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir) self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir) - output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2') + output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=1') assert re.search(r'Would prune:\s+test1', output) # must keep the latest non-checkpoint archive: assert re.search(r'Keeping archive \(rule: daily #1\):\s+test2', output) @@ -2041,7 +2041,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_in('test3.checkpoint', output) self.assert_in('test3.checkpoint.1', output) self.assert_in('test4.checkpoint', output) - self.cmd('prune', self.repository_location, '--keep-daily=2') + self.cmd('prune', self.repository_location, '--keep-daily=1') output = self.cmd('list', '--consider-checkpoints', self.repository_location) self.assert_not_in('test1', output) # the latest non-checkpoint archive must be still there: @@ -2063,13 +2063,13 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test1', src_dir) self.cmd('create', self.repository_location + '::test2', src_dir) - output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2') + output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=1') assert re.search(r'Keeping archive \(rule: daily #1\):\s+test2', output) assert re.search(r'Would prune:\s+test1', output) output = self.cmd('list', self.repository_location) self.assert_in('test1', output) self.assert_in('test2', output) - self.cmd('prune', '--save-space', self.repository_location, '--keep-daily=2') + self.cmd('prune', '--save-space', self.repository_location, '--keep-daily=1') output = self.cmd('list', self.repository_location) self.assert_not_in('test1', output) self.assert_in('test2', output) @@ -2080,7 +2080,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '::foo-2015-08-12-20:00', src_dir) self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir) self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir) - output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-') + output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=1', '--prefix=foo-') assert re.search(r'Keeping archive \(rule: daily #1\):\s+foo-2015-08-12-20:00', output) assert re.search(r'Would prune:\s+foo-2015-08-12-10:00', output) output = self.cmd('list', self.repository_location) @@ -2088,7 +2088,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_in('foo-2015-08-12-20:00', output) self.assert_in('bar-2015-08-12-10:00', output) self.assert_in('bar-2015-08-12-20:00', output) - self.cmd('prune', self.repository_location, '--keep-daily=2', '--prefix=foo-') + self.cmd('prune', self.repository_location, '--keep-daily=1', '--prefix=foo-') output = self.cmd('list', self.repository_location) self.assert_not_in('foo-2015-08-12-10:00', output) self.assert_in('foo-2015-08-12-20:00', output) @@ -2101,7 +2101,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '::2015-08-12-20:00-foo', src_dir) self.cmd('create', self.repository_location + '::2015-08-12-10:00-bar', src_dir) self.cmd('create', self.repository_location + '::2015-08-12-20:00-bar', src_dir) - output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--glob-archives=2015-*-foo') + output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=1', '--glob-archives=2015-*-foo') assert re.search(r'Keeping archive \(rule: daily #1\):\s+2015-08-12-20:00-foo', output) assert re.search(r'Would prune:\s+2015-08-12-10:00-foo', output) output = self.cmd('list', self.repository_location) @@ -2109,7 +2109,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_in('2015-08-12-20:00-foo', output) self.assert_in('2015-08-12-10:00-bar', output) self.assert_in('2015-08-12-20:00-bar', output) - self.cmd('prune', self.repository_location, '--keep-daily=2', '--glob-archives=2015-*-foo') + self.cmd('prune', self.repository_location, '--keep-daily=1', '--glob-archives=2015-*-foo') output = self.cmd('list', self.repository_location) self.assert_not_in('2015-08-12-10:00-foo', output) self.assert_in('2015-08-12-20:00-foo', output) From e208ba68bdd0a5c7b752e25ef353644b1a871e6a Mon Sep 17 00:00:00 2001 From: Alf Mikula Date: Thu, 17 Sep 2020 06:52:46 -0700 Subject: [PATCH 4/4] Document retention of oldest archive, add to example and tests --- docs/misc/prune-example.txt | 51 ++++++++++-------- requirements.d/development.txt | 1 + src/borg/archiver.py | 6 ++- src/borg/testsuite/archiver.py | 96 ++++++++++++++++++++++++++++++++++ 4 files changed, 132 insertions(+), 22 deletions(-) diff --git a/docs/misc/prune-example.txt b/docs/misc/prune-example.txt index 12ffeb6fd..bc6bb209a 100644 --- a/docs/misc/prune-example.txt +++ b/docs/misc/prune-example.txt @@ -1,14 +1,18 @@ borg prune visualized ===================== -Assume it is 2016-01-01, today's backup has not yet been made and you have +Assume it is 2016-01-01, today's backup has not yet been made, you have created at least one backup on each day in 2015 except on 2015-12-19 (no -backup made on that day). +backup made on that day), and you started backing up with borg on +2015-01-01. -This is what borg prune --keep-daily 14 --keep-monthly 6 would keep. +This is what borg prune --keep-daily 14 --keep-monthly 6 --keep-yearly 1 +would keep. Backups kept by the --keep-daily rule are marked by a "d" to the right, -backups kept by the --keep-monthly rule are marked by a "m" to the right. +backups kept by the --keep-monthly rule are marked by a "m" to the right, +and backups kept by the --keep-yearly rule are marked by a "y" to the +right. Calendar view ------------- @@ -16,7 +20,7 @@ Calendar view 2015 January February March Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su - 1 2 3 4 1 1 + 1y 2 3 4 1 1 5 6 7 8 9 10 11 2 3 4 5 6 7 8 2 3 4 5 6 7 8 12 13 14 15 16 17 18 9 10 11 12 13 14 15 9 10 11 12 13 14 15 19 20 21 22 23 24 25 16 17 18 19 20 21 22 16 17 18 19 20 21 22 @@ -53,16 +57,16 @@ Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su List view --------- ---keep-daily 14 --keep-monthly 6 -------------------------------------------------- - 1. 2015-12-31 (2015-12-31 kept by daily rule) - 2. 2015-12-30 1. 2015-11-30 - 3. 2015-12-29 2. 2015-10-31 - 4. 2015-12-28 3. 2015-09-30 - 5. 2015-12-27 4. 2015-08-31 - 6. 2015-12-26 5. 2015-07-31 - 7. 2015-12-25 6. 2015-06-30 - 8. 2015-12-24 +--keep-daily 14 --keep-monthly 6 --keep-yearly 1 +---------------------------------------------------------------- + 1. 2015-12-31 (2015-12-31 kept (2015-12-31 kept + 2. 2015-12-30 by daily rule) by daily rule) + 3. 2015-12-29 1. 2015-11-30 1. 2015-01-01 (oldest) + 4. 2015-12-28 2. 2015-10-31 + 5. 2015-12-27 3. 2015-09-30 + 6. 2015-12-26 4. 2015-08-31 + 7. 2015-12-25 5. 2015-07-31 + 8. 2015-12-24 6. 2015-06-30 9. 2015-12-23 10. 2015-12-22 11. 2015-12-21 @@ -76,18 +80,23 @@ Notes ----- 2015-12-31 is kept due to the --keep-daily 14 rule (because it is applied -first), not due to the --keep-monthly rule. +first), not due to the --keep-monthly or --keep-yearly rule. -Because of that, the --keep-monthly 6 rule keeps Nov, Oct, Sep, Aug, Jul and -Jun. December is not considered for this rule, because that backup was already -kept because of the daily rule. +The --keep-yearly 1 rule does not consider the December 31st backup because it +has already been kept due to the daily rule. There are no backups available +from previous years, so the --keep-yearly target of 1 backup is not satisfied. +Because of this, the 2015-01-01 archive (the oldest archive available) is kept. + +The --keep-monthly 6 rule keeps Nov, Oct, Sep, Aug, Jul and Jun. December is +not considered for this rule, because that backup was already kept because of +the daily rule. 2015-12-17 is kept to satisfy the --keep-daily 14 rule - because no backup was made on 2015-12-19. If a backup had been made on that day, it would not keep the one from 2015-12-17. -We did not include yearly, weekly, hourly, minutely or secondly rules to keep -this example simple. They all work in basically the same way. +We did not include weekly, hourly, minutely or secondly rules to keep this +example simple. They all work in basically the same way. The weekly rule is easy to understand roughly, but hard to understand in all details. If interested, read "ISO 8601:2000 standard week-based year". diff --git a/requirements.d/development.txt b/requirements.d/development.txt index a8f90b878..ec9098f23 100644 --- a/requirements.d/development.txt +++ b/requirements.d/development.txt @@ -10,3 +10,4 @@ pytest-cov pytest-benchmark Cython!=0.27 twine +python-dateutil diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 3c7d45e19..3cfead31a 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -4164,7 +4164,11 @@ class Archiver: rules do not count towards those of later rules. The time that each backup starts is used for pruning purposes. Dates and times are interpreted in the local timezone, and weeks go from Monday to Sunday. Specifying a - negative number of archives to keep means that there is no limit. + negative number of archives to keep means that there is no limit. As of borg + 1.2.0, borg will retain the oldest archive if any of the secondly, minutely, + hourly, daily, weekly, monthly, or yearly rules was not otherwise able to meet + its retention target. This enables the first chronological archive to continue + aging until it is replaced by a newer archive that meets the retention criteria. The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will keep the last N archives under the assumption that you do not create more than one diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 4b41ec042..763ba9103 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -1,4 +1,5 @@ import argparse +import dateutil.tz import errno import io import json @@ -2059,6 +2060,101 @@ class ArchiverTestCase(ArchiverTestCaseBase): # the latest archive must be still there self.assert_in('test5', output) + # Given a date and time in local tz, create a UTC timestamp string suitable + # for create --timestamp command line option + def _to_utc_timestamp(self, year, month, day, hour, minute, second): + dtime = datetime(year, month, day, hour, minute, second, 0, dateutil.tz.gettz()) + return dtime.astimezone(dateutil.tz.UTC).strftime("%Y-%m-%dT%H:%M:%S") + + def _create_archive_ts(self, name, y, m, d, H=0, M=0, S=0): + loc = self.repository_location + '::' + name + self.cmd('create', '--timestamp', self._to_utc_timestamp(y, m, d, H, M, S), loc, src_dir) + + # This test must match docs/misc/prune-example.txt + def test_prune_repository_example(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + # Archives that will be kept, per the example + # Oldest archive + self._create_archive_ts('test01', 2015, 1, 1) + # 6 monthly archives + self._create_archive_ts('test02', 2015, 6, 30) + self._create_archive_ts('test03', 2015, 7, 31) + self._create_archive_ts('test04', 2015, 8, 31) + self._create_archive_ts('test05', 2015, 9, 30) + self._create_archive_ts('test06', 2015, 10, 31) + self._create_archive_ts('test07', 2015, 11, 30) + # 14 daily archives + self._create_archive_ts('test08', 2015, 12, 17) + self._create_archive_ts('test09', 2015, 12, 18) + self._create_archive_ts('test10', 2015, 12, 20) + self._create_archive_ts('test11', 2015, 12, 21) + self._create_archive_ts('test12', 2015, 12, 22) + self._create_archive_ts('test13', 2015, 12, 23) + self._create_archive_ts('test14', 2015, 12, 24) + self._create_archive_ts('test15', 2015, 12, 25) + self._create_archive_ts('test16', 2015, 12, 26) + self._create_archive_ts('test17', 2015, 12, 27) + self._create_archive_ts('test18', 2015, 12, 28) + self._create_archive_ts('test19', 2015, 12, 29) + self._create_archive_ts('test20', 2015, 12, 30) + self._create_archive_ts('test21', 2015, 12, 31) + # Additional archives that would be pruned + # The second backup of the year + self._create_archive_ts('test22', 2015, 1, 2) + # The next older monthly backup + self._create_archive_ts('test23', 2015, 5, 31) + # The next older daily backup + self._create_archive_ts('test24', 2015, 12, 16) + output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=14', '--keep-monthly=6', '--keep-yearly=1') + # Prune second backup of the year + assert re.search(r'Would prune:\s+test22', output) + # Prune next older monthly and daily backups + assert re.search(r'Would prune:\s+test23', output) + assert re.search(r'Would prune:\s+test24', output) + # Must keep the other 21 backups + # Yearly is kept as oldest archive + assert re.search(r'Keeping archive \(rule: yearly\[oldest\] #1\):\s+test01', output) + for i in range(1, 7): + assert re.search(r'Keeping archive \(rule: monthly #' + str(i) + r'\):\s+test' + ("%02d" % (8-i)), output) + for i in range(1, 15): + assert re.search(r'Keeping archive \(rule: daily #' + str(i) + r'\):\s+test' + ("%02d" % (22-i)), output) + output = self.cmd('list', self.repository_location) + # Nothing pruned after dry run + for i in range(1, 25): + self.assert_in('test%02d' % i, output) + self.cmd('prune', self.repository_location, '--keep-daily=14', '--keep-monthly=6', '--keep-yearly=1') + output = self.cmd('list', self.repository_location) + # All matching backups plus oldest kept + for i in range(1, 22): + self.assert_in('test%02d' % i, output) + # Other backups have been pruned + for i in range(22, 25): + self.assert_not_in('test%02d' % i, output) + + # With an initial and daily backup, prune daily until oldest is replaced by a monthly backup + def test_prune_retain_and_expire_oldest(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + # Initial backup + self._create_archive_ts('original_archive', 2020, 9, 1, 11, 15) + # Archive and prune daily for 30 days + for i in range(1, 31): + self._create_archive_ts('september%02d' % i, 2020, 9, i, 12) + self.cmd('prune', self.repository_location, '--keep-daily=7', '--keep-monthly=1') + # Archive and prune 6 days into the next month + for i in range(1, 7): + self._create_archive_ts('october%02d' % i, 2020, 10, i, 12) + self.cmd('prune', self.repository_location, '--keep-daily=7', '--keep-monthly=1') + # Oldest backup is still retained + output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=7', '--keep-monthly=1') + assert re.search(r'Keeping archive \(rule: monthly\[oldest\] #1' + r'\):\s+original_archive', output) + # Archive one more day and prune. + self._create_archive_ts('october07', 2020, 10, 7, 12) + self.cmd('prune', self.repository_location, '--keep-daily=7', '--keep-monthly=1') + # Last day of previous month is retained as monthly, and oldest is expired. + output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=7', '--keep-monthly=1') + assert re.search(r'Keeping archive \(rule: monthly #1\):\s+september30', output) + self.assert_not_in('original_archive', output) + def test_prune_repository_save_space(self): self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('create', self.repository_location + '::test1', src_dir)