From de54228809046d3942b5c20e2d8fbea54653aba4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 21:08:47 -0400 Subject: [PATCH 01/63] first stab at an attic-borg converter for now, just in the test suite, but will be migrated to a separate command --- borg/testsuite/convert.py | 108 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 borg/testsuite/convert.py diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py new file mode 100644 index 000000000..e201581c3 --- /dev/null +++ b/borg/testsuite/convert.py @@ -0,0 +1,108 @@ +import binascii +import os +import pytest +import shutil +import tempfile + +import attic.repository + +from ..helpers import IntegrityError +from ..repository import Repository, MAGIC +from . import BaseTestCase + +class NotImplementedException(Exception): + pass + +class ConversionTestCase(BaseTestCase): + + def open(self, path, repo_type = Repository, create=False): + return repo_type(os.path.join(path, 'repository'), create = create) + + def setUp(self): + self.tmppath = tempfile.mkdtemp() + self.attic_repo = self.open(self.tmppath, + repo_type = attic.repository.Repository, + create = True) + # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ + for x in range(100): + self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + self.attic_repo.close() + + def test_convert(self): + self.repository = self.open(self.tmppath) + # check should fail because of magic number + assert not self.repository.check() # can't check raises() because check() handles the error + self.repository.close() + self.convert() + self.repository = self.open(self.tmppath) + assert self.repository.check() # can't check raises() because check() handles the error + self.repository.close() + + def convert(self): + '''convert an attic repository to a borg repository + + those are the files that need to be converted here, from most + important to least important: segments, key files, and various + caches, the latter being optional, as they will be rebuilt if + missing.''' + self.convert_segments() + with pytest.raises(NotImplementedException): + self.convert_keyfiles() + with pytest.raises(NotImplementedException): + self.convert_cache() + + def convert_segments(self): + '''convert repository segments from attic to borg + + replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in + `$ATTIC_REPO/data/**`. + + luckily the segment length didn't change so we can just + replace the 8 first bytes of all regular files in there. + + `Repository.segment_iterator()` could be used here.''' + self.repository = self.open(self.tmppath) + segs = [ filename for i, filename in self.repository.io.segment_iterator() ] + self.repository.close() + for filename in segs: + print("converting segment %s..." % filename) + with open(filename, 'r+b') as segment: + segment.seek(0) + segment.write(MAGIC) + + def convert_keyfiles(self): + '''convert key files from attic to borg + + replacement pattern is `s/ATTIC KEY/BORG_KEY/` in + `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or + `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or + `$HOME/.borg/keys`. + + the keyfiles are loaded by `KeyfileKey.find_key_file()`. that + finds the keys with the right identifier for the repo, no need + to decrypt to convert. will need to rewrite the whole key file + because magic number length changed.''' + raise NotImplementedException('not implemented') + + def convert_cache(self): + '''convert caches from attic to borg + + those are all hash indexes, so we need to + `s/ATTICIDX/BORG_IDX/` in a few locations: + + * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` + is the `Repository.get_index_transaction_id()`), which we + should probably update, with a lock, see + `Repository.open()`, which i'm not sure we should use + because it may write data on `Repository.close()`... + + * the `files` and `chunks` cache (in + `$HOME/.cache/attic//`), which we could just drop, + but if we'd want to convert, we could open it with the + `Cache.open()`, edit in place and then `Cache.close()` to + make sure we have locking right + ''' + raise NotImplementedException('not implemented') + + def tearDown(self): + shutil.rmtree(self.tmppath) From 9ab1e1961e8acf29b17b1acedc62b4f717b1fd65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:23:11 -0400 Subject: [PATCH 02/63] keyfile conversion code --- borg/testsuite/convert.py | 84 ++++++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 15 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index e201581c3..ba3af8ee6 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -5,16 +5,27 @@ import shutil import tempfile import attic.repository +import attic.key +import attic.helpers -from ..helpers import IntegrityError +from ..helpers import IntegrityError, get_keys_dir from ..repository import Repository, MAGIC +from ..key import KeyfileKey, KeyfileNotFoundError from . import BaseTestCase class NotImplementedException(Exception): pass +class AtticKeyfileKey(KeyfileKey): + '''backwards compatible Attick key file parser''' + FILE_ID = 'ATTIC KEY' + class ConversionTestCase(BaseTestCase): + class MockArgs: + def __init__(self, path): + self.repository = attic.helpers.Location(path) + def open(self, path, repo_type = Repository, create=False): return repo_type(os.path.join(path, 'repository'), create = create) @@ -26,6 +37,10 @@ class ConversionTestCase(BaseTestCase): # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ for x in range(100): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + self.keysdir = self.MockArgs(self.tmppath) + os.environ['ATTIC_KEYS_DIR'] = self.tmppath + os.environ['ATTIC_PASSPHRASE'] = 'test' + self.key = attic.key.KeyfileKey.create(self.attic_repo, self.keysdir) self.attic_repo.close() def test_convert(self): @@ -33,9 +48,15 @@ class ConversionTestCase(BaseTestCase): # check should fail because of magic number assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() + os.environ['BORG_KEYS_DIR'] = self.tmppath self.convert() + # check that the new keyfile is alright + keyfile = os.path.join(get_keys_dir(), + os.path.basename(self.key.path)) + with open(keyfile, 'r') as f: + assert f.read().startswith(KeyfileKey.FILE_ID) self.repository = self.open(self.tmppath) - assert self.repository.check() # can't check raises() because check() handles the error + assert self.repository.check() self.repository.close() def convert(self): @@ -45,32 +66,52 @@ class ConversionTestCase(BaseTestCase): important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if missing.''' - self.convert_segments() - with pytest.raises(NotImplementedException): - self.convert_keyfiles() + self.repository = self.open(self.tmppath) + segments = [ filename for i, filename in self.repository.io.segment_iterator() ] + try: + keyfile = self.find_attic_keyfile() + except KeyfileNotFoundError: + print("no key file found for repository, not converting") + else: + self.convert_keyfiles(keyfile) + self.repository.close() + self.convert_segments(segments) with pytest.raises(NotImplementedException): self.convert_cache() - def convert_segments(self): + def convert_segments(self, segments): '''convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in `$ATTIC_REPO/data/**`. luckily the segment length didn't change so we can just - replace the 8 first bytes of all regular files in there. - - `Repository.segment_iterator()` could be used here.''' - self.repository = self.open(self.tmppath) - segs = [ filename for i, filename in self.repository.io.segment_iterator() ] - self.repository.close() - for filename in segs: + replace the 8 first bytes of all regular files in there.''' + for filename in segments: print("converting segment %s..." % filename) with open(filename, 'r+b') as segment: segment.seek(0) segment.write(MAGIC) - def convert_keyfiles(self): + def find_attic_keyfile(self): + '''find the attic keyfiles + + this is expected to look into $HOME/.attic/keys or + $ATTIC_KEYS_DIR for key files matching the given Borg + repository. + + it is expected to raise an exception (KeyfileNotFoundError) if + no key is found. whether that exception is from Borg or Attic + is unclear. + + this is split in a separate function in case we want to + reimplement the attic code here. + ''' + self.repository._location = attic.helpers.Location(self.tmppath) + return attic.key.KeyfileKey().find_key_file(self.repository) + + def convert_keyfiles(self, keyfile): + '''convert key files from attic to borg replacement pattern is `s/ATTIC KEY/BORG_KEY/` in @@ -82,7 +123,20 @@ class ConversionTestCase(BaseTestCase): finds the keys with the right identifier for the repo, no need to decrypt to convert. will need to rewrite the whole key file because magic number length changed.''' - raise NotImplementedException('not implemented') + print("converting keyfile %s" % keyfile) + with open(keyfile, 'r') as f: + data = f.read() + data = data.replace(AtticKeyfileKey.FILE_ID, + KeyfileKey.FILE_ID, + 1) + keyfile = os.path.join(get_keys_dir(), + os.path.basename(keyfile)) + print("writing borg keyfile to %s" % keyfile) + with open(keyfile, 'w') as f: + f.write(data) + with open(keyfile, 'r') as f: + data = f.read() + assert data.startswith(KeyfileKey.FILE_ID) def convert_cache(self): '''convert caches from attic to borg From e88a994c8a2bd269d23a1fb4307dd2d9923c5668 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:40:46 -0400 Subject: [PATCH 03/63] reshuffle and document --- borg/testsuite/convert.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index ba3af8ee6..52564134d 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -37,10 +37,17 @@ class ConversionTestCase(BaseTestCase): # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ for x in range(100): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') - self.keysdir = self.MockArgs(self.tmppath) + + # we use the repo dir for the created keyfile, because we do + # not want to clutter existing keyfiles os.environ['ATTIC_KEYS_DIR'] = self.tmppath + + # we use the same directory for the converted files, which + # will clutter the previously created one, which we don't care + # about anyways. in real runs, the original key will be retained. + os.environ['BORG_KEYS_DIR'] = self.tmppath os.environ['ATTIC_PASSPHRASE'] = 'test' - self.key = attic.key.KeyfileKey.create(self.attic_repo, self.keysdir) + self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) self.attic_repo.close() def test_convert(self): @@ -48,7 +55,6 @@ class ConversionTestCase(BaseTestCase): # check should fail because of magic number assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() - os.environ['BORG_KEYS_DIR'] = self.tmppath self.convert() # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), @@ -96,6 +102,9 @@ class ConversionTestCase(BaseTestCase): def find_attic_keyfile(self): '''find the attic keyfiles + the keyfiles are loaded by `KeyfileKey.find_key_file()`. that + finds the keys with the right identifier for the repo + this is expected to look into $HOME/.attic/keys or $ATTIC_KEYS_DIR for key files matching the given Borg repository. @@ -119,10 +128,10 @@ class ConversionTestCase(BaseTestCase): `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or `$HOME/.borg/keys`. - the keyfiles are loaded by `KeyfileKey.find_key_file()`. that - finds the keys with the right identifier for the repo, no need - to decrypt to convert. will need to rewrite the whole key file - because magic number length changed.''' + no need to decrypt to convert. we need to rewrite the whole + key file because magic number length changed, but that's not a + problem because the keyfiles are small (compared to, say, + all the segments).''' print("converting keyfile %s" % keyfile) with open(keyfile, 'r') as f: data = f.read() From 2d1988179e5149bb7bc29d589a5fa6887b00e76b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:41:38 -0400 Subject: [PATCH 04/63] some debugging code --- borg/testsuite/convert.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 52564134d..c95ffa793 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -53,6 +53,7 @@ class ConversionTestCase(BaseTestCase): def test_convert(self): self.repository = self.open(self.tmppath) # check should fail because of magic number + print("this will show an error, it is expected") assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() self.convert() @@ -72,7 +73,9 @@ class ConversionTestCase(BaseTestCase): important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if missing.''' + print("opening attic repository with borg") self.repository = self.open(self.tmppath) + print("reading segments from attic repository using borg") segments = [ filename for i, filename in self.repository.io.segment_iterator() ] try: keyfile = self.find_attic_keyfile() @@ -94,7 +97,7 @@ class ConversionTestCase(BaseTestCase): luckily the segment length didn't change so we can just replace the 8 first bytes of all regular files in there.''' for filename in segments: - print("converting segment %s..." % filename) + print("converting segment %s in place" % filename) with open(filename, 'r+b') as segment: segment.seek(0) segment.write(MAGIC) From c7af4c7f1d1f8d5380bce60c27d743f11e0dc81d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:43:08 -0400 Subject: [PATCH 05/63] more debug --- borg/testsuite/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index c95ffa793..5ef88893b 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -80,7 +80,7 @@ class ConversionTestCase(BaseTestCase): try: keyfile = self.find_attic_keyfile() except KeyfileNotFoundError: - print("no key file found for repository, not converting") + print("no key file found for repository") else: self.convert_keyfiles(keyfile) self.repository.close() From 312c3cf738318ec0dba0383a23159f17fa0aa1e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:53:58 -0400 Subject: [PATCH 06/63] rewrite converter to avoid using attic code the unit tests themselves still use attic to generate an attic repository for testing, but the converter code should now be standalone --- borg/testsuite/convert.py | 42 +++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 5ef88893b..44b0a3f39 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -1,4 +1,4 @@ -import binascii +from binascii import hexlify import os import pytest import shutil @@ -20,6 +20,37 @@ class AtticKeyfileKey(KeyfileKey): '''backwards compatible Attick key file parser''' FILE_ID = 'ATTIC KEY' + # verbatim copy from attic + @staticmethod + def get_keys_dir(): + """Determine where to repository keys and cache""" + return os.environ.get('ATTIC_KEYS_DIR', + os.path.join(os.path.expanduser('~'), '.attic', 'keys')) + + @classmethod + def find_key_file(cls, repository): + '''copy of attic's `find_key_file`_ + + this has two small modifications: + + 1. it uses the above `get_keys_dir`_ instead of the global one, + assumed to be borg's + + 2. it uses `repository.path`_ instead of + `repository._location.canonical_path`_ because we can't + assume the repository has been opened by the archiver yet + ''' + get_keys_dir = cls.get_keys_dir + id = hexlify(repository.id).decode('ascii') + keys_dir = get_keys_dir() + for name in os.listdir(keys_dir): + filename = os.path.join(keys_dir, name) + with open(filename, 'r') as fd: + line = fd.readline().strip() + if line and line.startswith(cls.FILE_ID) and line[10:] == id: + return filename + raise KeyfileNotFoundError(repository.path, get_keys_dir()) + class ConversionTestCase(BaseTestCase): class MockArgs: @@ -116,11 +147,10 @@ class ConversionTestCase(BaseTestCase): no key is found. whether that exception is from Borg or Attic is unclear. - this is split in a separate function in case we want to - reimplement the attic code here. - ''' - self.repository._location = attic.helpers.Location(self.tmppath) - return attic.key.KeyfileKey().find_key_file(self.repository) + this is split in a separate function in case we want to use + the attic code here directly, instead of our local + implementation.''' + return AtticKeyfileKey.find_key_file(self.repository) def convert_keyfiles(self, keyfile): From aa25a217a46b678b14ddbd08d3ec66e2cc11b349 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:01:03 -0400 Subject: [PATCH 07/63] move conversion code to a separate class for clarity --- borg/testsuite/convert.py | 86 +++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 44b0a3f39..f4b8bd3db 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -16,41 +16,6 @@ from . import BaseTestCase class NotImplementedException(Exception): pass -class AtticKeyfileKey(KeyfileKey): - '''backwards compatible Attick key file parser''' - FILE_ID = 'ATTIC KEY' - - # verbatim copy from attic - @staticmethod - def get_keys_dir(): - """Determine where to repository keys and cache""" - return os.environ.get('ATTIC_KEYS_DIR', - os.path.join(os.path.expanduser('~'), '.attic', 'keys')) - - @classmethod - def find_key_file(cls, repository): - '''copy of attic's `find_key_file`_ - - this has two small modifications: - - 1. it uses the above `get_keys_dir`_ instead of the global one, - assumed to be borg's - - 2. it uses `repository.path`_ instead of - `repository._location.canonical_path`_ because we can't - assume the repository has been opened by the archiver yet - ''' - get_keys_dir = cls.get_keys_dir - id = hexlify(repository.id).decode('ascii') - keys_dir = get_keys_dir() - for name in os.listdir(keys_dir): - filename = os.path.join(keys_dir, name) - with open(filename, 'r') as fd: - line = fd.readline().strip() - if line and line.startswith(cls.FILE_ID) and line[10:] == id: - return filename - raise KeyfileNotFoundError(repository.path, get_keys_dir()) - class ConversionTestCase(BaseTestCase): class MockArgs: @@ -81,13 +46,17 @@ class ConversionTestCase(BaseTestCase): self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) self.attic_repo.close() + def tearDown(self): + shutil.rmtree(self.tmppath) + def test_convert(self): self.repository = self.open(self.tmppath) # check should fail because of magic number print("this will show an error, it is expected") assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() - self.convert() + print("opening attic repository with borg and converting") + self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert() # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), os.path.basename(self.key.path)) @@ -97,6 +66,7 @@ class ConversionTestCase(BaseTestCase): assert self.repository.check() self.repository.close() +class AtticRepositoryConverter(Repository): def convert(self): '''convert an attic repository to a borg repository @@ -104,17 +74,15 @@ class ConversionTestCase(BaseTestCase): important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if missing.''' - print("opening attic repository with borg") - self.repository = self.open(self.tmppath) print("reading segments from attic repository using borg") - segments = [ filename for i, filename in self.repository.io.segment_iterator() ] + segments = [ filename for i, filename in self.io.segment_iterator() ] try: keyfile = self.find_attic_keyfile() except KeyfileNotFoundError: print("no key file found for repository") else: self.convert_keyfiles(keyfile) - self.repository.close() + self.close() self.convert_segments(segments) with pytest.raises(NotImplementedException): self.convert_cache() @@ -150,7 +118,7 @@ class ConversionTestCase(BaseTestCase): this is split in a separate function in case we want to use the attic code here directly, instead of our local implementation.''' - return AtticKeyfileKey.find_key_file(self.repository) + return AtticKeyfileKey.find_key_file(self) def convert_keyfiles(self, keyfile): @@ -200,5 +168,37 @@ class ConversionTestCase(BaseTestCase): ''' raise NotImplementedException('not implemented') - def tearDown(self): - shutil.rmtree(self.tmppath) +class AtticKeyfileKey(KeyfileKey): + '''backwards compatible Attick key file parser''' + FILE_ID = 'ATTIC KEY' + + # verbatim copy from attic + @staticmethod + def get_keys_dir(): + """Determine where to repository keys and cache""" + return os.environ.get('ATTIC_KEYS_DIR', + os.path.join(os.path.expanduser('~'), '.attic', 'keys')) + + @classmethod + def find_key_file(cls, repository): + '''copy of attic's `find_key_file`_ + + this has two small modifications: + + 1. it uses the above `get_keys_dir`_ instead of the global one, + assumed to be borg's + + 2. it uses `repository.path`_ instead of + `repository._location.canonical_path`_ because we can't + assume the repository has been opened by the archiver yet + ''' + get_keys_dir = cls.get_keys_dir + id = hexlify(repository.id).decode('ascii') + keys_dir = get_keys_dir() + for name in os.listdir(keys_dir): + filename = os.path.join(keys_dir, name) + with open(filename, 'r') as fd: + line = fd.readline().strip() + if line and line.startswith(cls.FILE_ID) and line[10:] == id: + return filename + raise KeyfileNotFoundError(repository.path, get_keys_dir()) From 5a1680397c571ad2f42e731d4835b7f2f356aa55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:02:21 -0400 Subject: [PATCH 08/63] remove needless use of self --- borg/testsuite/convert.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index f4b8bd3db..9d5f95142 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -87,7 +87,8 @@ class AtticRepositoryConverter(Repository): with pytest.raises(NotImplementedException): self.convert_cache() - def convert_segments(self, segments): + @staticmethod + def convert_segments(segments): '''convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in @@ -120,7 +121,8 @@ class AtticRepositoryConverter(Repository): implementation.''' return AtticKeyfileKey.find_key_file(self) - def convert_keyfiles(self, keyfile): + @staticmethod + def convert_keyfiles(keyfile): '''convert key files from attic to borg From c30df4e033834c4d96be67fe4bcedb75014dc115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:18:03 -0400 Subject: [PATCH 09/63] move converter code out of test suite --- borg/converter.py | 147 +++++++++++++++++++++++++++++++++++++ borg/testsuite/convert.py | 151 ++------------------------------------ 2 files changed, 152 insertions(+), 146 deletions(-) create mode 100644 borg/converter.py diff --git a/borg/converter.py b/borg/converter.py new file mode 100644 index 000000000..b558af883 --- /dev/null +++ b/borg/converter.py @@ -0,0 +1,147 @@ +from binascii import hexlify +import os + +from .helpers import IntegrityError, get_keys_dir +from .repository import Repository, MAGIC +from .key import KeyfileKey, KeyfileNotFoundError + +class NotImplementedException(Exception): + pass + +class AtticRepositoryConverter(Repository): + def convert(self): + '''convert an attic repository to a borg repository + + those are the files that need to be converted here, from most + important to least important: segments, key files, and various + caches, the latter being optional, as they will be rebuilt if + missing.''' + print("reading segments from attic repository using borg") + segments = [ filename for i, filename in self.io.segment_iterator() ] + try: + keyfile = self.find_attic_keyfile() + except KeyfileNotFoundError: + print("no key file found for repository") + else: + self.convert_keyfiles(keyfile) + self.close() + self.convert_segments(segments) + self.convert_cache() + + @staticmethod + def convert_segments(segments): + '''convert repository segments from attic to borg + + replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in + `$ATTIC_REPO/data/**`. + + luckily the segment length didn't change so we can just + replace the 8 first bytes of all regular files in there.''' + for filename in segments: + print("converting segment %s in place" % filename) + with open(filename, 'r+b') as segment: + segment.seek(0) + segment.write(MAGIC) + + def find_attic_keyfile(self): + '''find the attic keyfiles + + the keyfiles are loaded by `KeyfileKey.find_key_file()`. that + finds the keys with the right identifier for the repo + + this is expected to look into $HOME/.attic/keys or + $ATTIC_KEYS_DIR for key files matching the given Borg + repository. + + it is expected to raise an exception (KeyfileNotFoundError) if + no key is found. whether that exception is from Borg or Attic + is unclear. + + this is split in a separate function in case we want to use + the attic code here directly, instead of our local + implementation.''' + return AtticKeyfileKey.find_key_file(self) + + @staticmethod + def convert_keyfiles(keyfile): + + '''convert key files from attic to borg + + replacement pattern is `s/ATTIC KEY/BORG_KEY/` in + `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or + `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or + `$HOME/.borg/keys`. + + no need to decrypt to convert. we need to rewrite the whole + key file because magic number length changed, but that's not a + problem because the keyfiles are small (compared to, say, + all the segments).''' + print("converting keyfile %s" % keyfile) + with open(keyfile, 'r') as f: + data = f.read() + data = data.replace(AtticKeyfileKey.FILE_ID, + KeyfileKey.FILE_ID, + 1) + keyfile = os.path.join(get_keys_dir(), + os.path.basename(keyfile)) + print("writing borg keyfile to %s" % keyfile) + with open(keyfile, 'w') as f: + f.write(data) + with open(keyfile, 'r') as f: + data = f.read() + assert data.startswith(KeyfileKey.FILE_ID) + + def convert_cache(self): + '''convert caches from attic to borg + + those are all hash indexes, so we need to + `s/ATTICIDX/BORG_IDX/` in a few locations: + + * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` + is the `Repository.get_index_transaction_id()`), which we + should probably update, with a lock, see + `Repository.open()`, which i'm not sure we should use + because it may write data on `Repository.close()`... + + * the `files` and `chunks` cache (in + `$HOME/.cache/attic//`), which we could just drop, + but if we'd want to convert, we could open it with the + `Cache.open()`, edit in place and then `Cache.close()` to + make sure we have locking right + ''' + raise NotImplementedException('not implemented') + +class AtticKeyfileKey(KeyfileKey): + '''backwards compatible Attick key file parser''' + FILE_ID = 'ATTIC KEY' + + # verbatim copy from attic + @staticmethod + def get_keys_dir(): + """Determine where to repository keys and cache""" + return os.environ.get('ATTIC_KEYS_DIR', + os.path.join(os.path.expanduser('~'), '.attic', 'keys')) + + @classmethod + def find_key_file(cls, repository): + '''copy of attic's `find_key_file`_ + + this has two small modifications: + + 1. it uses the above `get_keys_dir`_ instead of the global one, + assumed to be borg's + + 2. it uses `repository.path`_ instead of + `repository._location.canonical_path`_ because we can't + assume the repository has been opened by the archiver yet + ''' + get_keys_dir = cls.get_keys_dir + id = hexlify(repository.id).decode('ascii') + keys_dir = get_keys_dir() + for name in os.listdir(keys_dir): + filename = os.path.join(keys_dir, name) + with open(filename, 'r') as fd: + line = fd.readline().strip() + if line and line.startswith(cls.FILE_ID) and line[10:] == id: + return filename + raise KeyfileNotFoundError(repository.path, get_keys_dir()) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 9d5f95142..74196063e 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -1,4 +1,3 @@ -from binascii import hexlify import os import pytest import shutil @@ -8,14 +7,12 @@ import attic.repository import attic.key import attic.helpers -from ..helpers import IntegrityError, get_keys_dir +from ..converter import AtticRepositoryConverter, NotImplementedException +from ..helpers import get_keys_dir +from ..key import KeyfileKey from ..repository import Repository, MAGIC -from ..key import KeyfileKey, KeyfileNotFoundError from . import BaseTestCase -class NotImplementedException(Exception): - pass - class ConversionTestCase(BaseTestCase): class MockArgs: @@ -56,7 +53,8 @@ class ConversionTestCase(BaseTestCase): assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() print("opening attic repository with borg and converting") - self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert() + with pytest.raises(NotImplementedException): + self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert() # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), os.path.basename(self.key.path)) @@ -65,142 +63,3 @@ class ConversionTestCase(BaseTestCase): self.repository = self.open(self.tmppath) assert self.repository.check() self.repository.close() - -class AtticRepositoryConverter(Repository): - def convert(self): - '''convert an attic repository to a borg repository - - those are the files that need to be converted here, from most - important to least important: segments, key files, and various - caches, the latter being optional, as they will be rebuilt if - missing.''' - print("reading segments from attic repository using borg") - segments = [ filename for i, filename in self.io.segment_iterator() ] - try: - keyfile = self.find_attic_keyfile() - except KeyfileNotFoundError: - print("no key file found for repository") - else: - self.convert_keyfiles(keyfile) - self.close() - self.convert_segments(segments) - with pytest.raises(NotImplementedException): - self.convert_cache() - - @staticmethod - def convert_segments(segments): - '''convert repository segments from attic to borg - - replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in - `$ATTIC_REPO/data/**`. - - luckily the segment length didn't change so we can just - replace the 8 first bytes of all regular files in there.''' - for filename in segments: - print("converting segment %s in place" % filename) - with open(filename, 'r+b') as segment: - segment.seek(0) - segment.write(MAGIC) - - def find_attic_keyfile(self): - '''find the attic keyfiles - - the keyfiles are loaded by `KeyfileKey.find_key_file()`. that - finds the keys with the right identifier for the repo - - this is expected to look into $HOME/.attic/keys or - $ATTIC_KEYS_DIR for key files matching the given Borg - repository. - - it is expected to raise an exception (KeyfileNotFoundError) if - no key is found. whether that exception is from Borg or Attic - is unclear. - - this is split in a separate function in case we want to use - the attic code here directly, instead of our local - implementation.''' - return AtticKeyfileKey.find_key_file(self) - - @staticmethod - def convert_keyfiles(keyfile): - - '''convert key files from attic to borg - - replacement pattern is `s/ATTIC KEY/BORG_KEY/` in - `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or - `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or - `$HOME/.borg/keys`. - - no need to decrypt to convert. we need to rewrite the whole - key file because magic number length changed, but that's not a - problem because the keyfiles are small (compared to, say, - all the segments).''' - print("converting keyfile %s" % keyfile) - with open(keyfile, 'r') as f: - data = f.read() - data = data.replace(AtticKeyfileKey.FILE_ID, - KeyfileKey.FILE_ID, - 1) - keyfile = os.path.join(get_keys_dir(), - os.path.basename(keyfile)) - print("writing borg keyfile to %s" % keyfile) - with open(keyfile, 'w') as f: - f.write(data) - with open(keyfile, 'r') as f: - data = f.read() - assert data.startswith(KeyfileKey.FILE_ID) - - def convert_cache(self): - '''convert caches from attic to borg - - those are all hash indexes, so we need to - `s/ATTICIDX/BORG_IDX/` in a few locations: - - * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` - is the `Repository.get_index_transaction_id()`), which we - should probably update, with a lock, see - `Repository.open()`, which i'm not sure we should use - because it may write data on `Repository.close()`... - - * the `files` and `chunks` cache (in - `$HOME/.cache/attic//`), which we could just drop, - but if we'd want to convert, we could open it with the - `Cache.open()`, edit in place and then `Cache.close()` to - make sure we have locking right - ''' - raise NotImplementedException('not implemented') - -class AtticKeyfileKey(KeyfileKey): - '''backwards compatible Attick key file parser''' - FILE_ID = 'ATTIC KEY' - - # verbatim copy from attic - @staticmethod - def get_keys_dir(): - """Determine where to repository keys and cache""" - return os.environ.get('ATTIC_KEYS_DIR', - os.path.join(os.path.expanduser('~'), '.attic', 'keys')) - - @classmethod - def find_key_file(cls, repository): - '''copy of attic's `find_key_file`_ - - this has two small modifications: - - 1. it uses the above `get_keys_dir`_ instead of the global one, - assumed to be borg's - - 2. it uses `repository.path`_ instead of - `repository._location.canonical_path`_ because we can't - assume the repository has been opened by the archiver yet - ''' - get_keys_dir = cls.get_keys_dir - id = hexlify(repository.id).decode('ascii') - keys_dir = get_keys_dir() - for name in os.listdir(keys_dir): - filename = os.path.join(keys_dir, name) - with open(filename, 'r') as fd: - line = fd.readline().strip() - if line and line.startswith(cls.FILE_ID) and line[10:] == id: - return filename - raise KeyfileNotFoundError(repository.path, get_keys_dir()) From 77ed6dec2ba3dcbbec4b4027ffd3313fa03f6905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:27:55 -0400 Subject: [PATCH 10/63] skip converter tests if attic isn't installed --- borg/testsuite/convert.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 74196063e..29b7c49f0 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -3,9 +3,14 @@ import pytest import shutil import tempfile -import attic.repository -import attic.key -import attic.helpers +try: + import attic.repository + import attic.key + import attic.helpers +except ImportError: + attic = None +pytestmark = pytest.mark.skipif(attic is None, + reason = 'cannot find an attic install') from ..converter import AtticRepositoryConverter, NotImplementedException from ..helpers import get_keys_dir From e5543657658b46ef48248697531ca447ec86bcda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:28:07 -0400 Subject: [PATCH 11/63] remove unused import --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index b558af883..a416f3e79 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -1,7 +1,7 @@ from binascii import hexlify import os -from .helpers import IntegrityError, get_keys_dir +from .helpers import get_keys_dir from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError From f35e8e17f2b4b4379bb250d3b495c6c59f734cb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:50:35 -0400 Subject: [PATCH 12/63] add dry run support to converter --- borg/converter.py | 29 ++++++++++++++++------------- borg/testsuite/convert.py | 2 +- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index a416f3e79..d949fd31a 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -9,7 +9,7 @@ class NotImplementedException(Exception): pass class AtticRepositoryConverter(Repository): - def convert(self): + def convert(self, dryrun=True): '''convert an attic repository to a borg repository those are the files that need to be converted here, from most @@ -23,13 +23,13 @@ class AtticRepositoryConverter(Repository): except KeyfileNotFoundError: print("no key file found for repository") else: - self.convert_keyfiles(keyfile) + self.convert_keyfiles(keyfile, dryrun) self.close() - self.convert_segments(segments) - self.convert_cache() + self.convert_segments(segments, dryrun) + self.convert_cache(dryrun) @staticmethod - def convert_segments(segments): + def convert_segments(segments, dryrun): '''convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in @@ -39,6 +39,8 @@ class AtticRepositoryConverter(Repository): replace the 8 first bytes of all regular files in there.''' for filename in segments: print("converting segment %s in place" % filename) + if dryrun: + continue with open(filename, 'r+b') as segment: segment.seek(0) segment.write(MAGIC) @@ -63,7 +65,7 @@ class AtticRepositoryConverter(Repository): return AtticKeyfileKey.find_key_file(self) @staticmethod - def convert_keyfiles(keyfile): + def convert_keyfiles(keyfile, dryrun): '''convert key files from attic to borg @@ -85,13 +87,14 @@ class AtticRepositoryConverter(Repository): keyfile = os.path.join(get_keys_dir(), os.path.basename(keyfile)) print("writing borg keyfile to %s" % keyfile) - with open(keyfile, 'w') as f: - f.write(data) - with open(keyfile, 'r') as f: - data = f.read() - assert data.startswith(KeyfileKey.FILE_ID) + if not dryrun: + with open(keyfile, 'w') as f: + f.write(data) + with open(keyfile, 'r') as f: + data = f.read() + assert data.startswith(KeyfileKey.FILE_ID) - def convert_cache(self): + def convert_cache(self, dryrun): '''convert caches from attic to borg those are all hash indexes, so we need to @@ -109,7 +112,7 @@ class AtticRepositoryConverter(Repository): `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right ''' - raise NotImplementedException('not implemented') + raise NotImplementedException('cache conversion not implemented, next borg backup will take longer to rebuild those caches') class AtticKeyfileKey(KeyfileKey): '''backwards compatible Attick key file parser''' diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 29b7c49f0..e708ea60d 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -59,7 +59,7 @@ class ConversionTestCase(BaseTestCase): self.repository.close() print("opening attic repository with borg and converting") with pytest.raises(NotImplementedException): - self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert() + self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), os.path.basename(self.key.path)) From a5f32b0a27c076326500e5b335c6592e083dc130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:50:46 -0400 Subject: [PATCH 13/63] add convert command --- borg/archiver.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/borg/archiver.py b/borg/archiver.py index 28f1d8a3f..2c4302b3a 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -17,6 +17,7 @@ import traceback from . import __version__ from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS from .compress import Compressor, COMPR_BUFFER +from .converter import AtticRepositoryConverter, NotImplementedException from .repository import Repository from .cache import Cache from .key import key_creator @@ -462,6 +463,15 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") stats.print_('Deleted data:', cache) return self.exit_code + def do_convert(self, parser, commands, args): + '''convert a repository from attic to borg''' + repo = AtticRepositoryConverter(os.path.join(args.repository, 'repository'), create=False) + try: + repo.convert(args.dry_run) + except NotImplementedException as e: + print("warning: %s" % e) + return self.exit_code + helptext = {} helptext['patterns'] = ''' Exclude patterns use a variant of shell pattern syntax, with '*' matching any @@ -896,6 +906,43 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") type=location_validator(archive=False), help='repository to prune') + convert_epilog = textwrap.dedent(""" + convert will convert an existing Attic repository to Borg in place. + + it will change the magic numbers in the repository's segments + to match the new Borg magic numbers. the keyfiles found in + $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and + copied to $BORG_KEYS_DIR or ~/.borg/keys. + + the cache files are *not* currently converted, which will + result in a much longer backup the first time. you can run + `borg check --repair` to rebuild those files after the + conversion. + + the conversion can IRREMEDIABLY DAMAGE YOUR REPOSITORY! Attic + will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as + the magic numbers will have changed. + + it is recommended you run this on a copy of the Attic + repository, in case something goes wrong, for example: + + cp -a attic borg + borg convert -n borg + borg convert borg + + you have been warned.""") + subparser = subparsers.add_parser('convert', parents=[common_parser], + description=self.do_convert.__doc__, + epilog=convert_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter) + subparser.set_defaults(func=self.do_convert) + subparser.add_argument('-n', '--dry-run', dest='dry_run', + default=False, action='store_true', + help='do not change repository') + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', + type=location_validator(archive=False), + help='path to the attic repository to be converted') + subparser = subparsers.add_parser('help', parents=[common_parser], description='Extra help') subparser.add_argument('--epilog-only', dest='epilog_only', From 1b29699403facffc1396c0741936df3dd8a1c8f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 00:15:12 -0400 Subject: [PATCH 14/63] cosmetic: reorder --- borg/testsuite/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index e708ea60d..d48a0e05b 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -35,6 +35,7 @@ class ConversionTestCase(BaseTestCase): # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ for x in range(100): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + self.attic_repo.close() # we use the repo dir for the created keyfile, because we do # not want to clutter existing keyfiles @@ -46,7 +47,6 @@ class ConversionTestCase(BaseTestCase): os.environ['BORG_KEYS_DIR'] = self.tmppath os.environ['ATTIC_PASSPHRASE'] = 'test' self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) - self.attic_repo.close() def tearDown(self): shutil.rmtree(self.tmppath) From 1ba856d2b3ff3fdbbd6bc3afb25701a7bcb57c45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 00:15:25 -0400 Subject: [PATCH 15/63] refactor: group test repo subroutine --- borg/testsuite/convert.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index d48a0e05b..cb9f5ec4c 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -51,12 +51,16 @@ class ConversionTestCase(BaseTestCase): def tearDown(self): shutil.rmtree(self.tmppath) - def test_convert(self): + def check_repo(self, state = True): + if not state: + print("this will show an error, this is expected") self.repository = self.open(self.tmppath) - # check should fail because of magic number - print("this will show an error, it is expected") - assert not self.repository.check() # can't check raises() because check() handles the error + assert self.repository.check() is state # can't check raises() because check() handles the error self.repository.close() + + def test_convert(self): + # check should fail because of magic number + self.check_repo(False) print("opening attic repository with borg and converting") with pytest.raises(NotImplementedException): self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) @@ -65,6 +69,4 @@ class ConversionTestCase(BaseTestCase): os.path.basename(self.key.path)) with open(keyfile, 'r') as f: assert f.read().startswith(KeyfileKey.FILE_ID) - self.repository = self.open(self.tmppath) - assert self.repository.check() - self.repository.close() + self.check_repo() From bcd94b96e0e2a7932e75557403e08d70d2e7fc94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 00:32:34 -0400 Subject: [PATCH 16/63] split up keyfile, segments and overall testing in converter --- borg/testsuite/convert.py | 53 ++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index cb9f5ec4c..172b308ec 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -12,7 +12,7 @@ except ImportError: pytestmark = pytest.mark.skipif(attic is None, reason = 'cannot find an attic install') -from ..converter import AtticRepositoryConverter, NotImplementedException +from ..converter import AtticRepositoryConverter, NotImplementedException, AtticKeyfileKey from ..helpers import get_keys_dir from ..key import KeyfileKey from ..repository import Repository, MAGIC @@ -20,10 +20,6 @@ from . import BaseTestCase class ConversionTestCase(BaseTestCase): - class MockArgs: - def __init__(self, path): - self.repository = attic.helpers.Location(path) - def open(self, path, repo_type = Repository, create=False): return repo_type(os.path.join(path, 'repository'), create = create) @@ -37,6 +33,34 @@ class ConversionTestCase(BaseTestCase): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') self.attic_repo.close() + def tearDown(self): + shutil.rmtree(self.tmppath) + + def check_repo(self, state = True): + if not state: + print("this will show an error, this is expected") + repository = self.open(self.tmppath) + assert repository.check() is state # can't check raises() because check() handles the error + repository.close() + + def test_convert_segments(self): + # check should fail because of magic number + self.check_repo(False) + print("opening attic repository with borg and converting") + repo = self.open(self.tmppath, repo_type = AtticRepositoryConverter) + segments = [ filename for i, filename in repo.io.segment_iterator() ] + repo.close() + repo.convert_segments(segments, dryrun=False) + self.check_repo() + +class EncryptedConversionTestCase(ConversionTestCase): + class MockArgs: + def __init__(self, path): + self.repository = attic.helpers.Location(path) + + def setUp(self): + super().setUp() + # we use the repo dir for the created keyfile, because we do # not want to clutter existing keyfiles os.environ['ATTIC_KEYS_DIR'] = self.tmppath @@ -48,17 +72,18 @@ class ConversionTestCase(BaseTestCase): os.environ['ATTIC_PASSPHRASE'] = 'test' self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) - def tearDown(self): - shutil.rmtree(self.tmppath) + def test_keys(self): + repository = self.open(self.tmppath, repo_type = AtticRepositoryConverter) + keyfile = AtticKeyfileKey.find_key_file(repository) + AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) - def check_repo(self, state = True): - if not state: - print("this will show an error, this is expected") - self.repository = self.open(self.tmppath) - assert self.repository.check() is state # can't check raises() because check() handles the error - self.repository.close() + # check that the new keyfile is alright + keyfile = os.path.join(get_keys_dir(), + os.path.basename(self.key.path)) + with open(keyfile, 'r') as f: + assert f.read().startswith(KeyfileKey.FILE_ID) - def test_convert(self): + def test_convert_all(self): # check should fail because of magic number self.check_repo(False) print("opening attic repository with borg and converting") From c99082922553114c981109178e4b3d5c8c13f195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 00:51:19 -0400 Subject: [PATCH 17/63] add attic dependency for build as a separate factor this way we don't depend on attic for regular build, but we can still see proper test coverage --- tox.ini | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index d177c121a..8fd697657 100644 --- a/tox.ini +++ b/tox.ini @@ -2,13 +2,15 @@ # fakeroot -u tox --recreate [tox] -envlist = py32, py33, py34, py35 +envlist = py{32,33,34,35}{,-attic} [testenv] # Change dir to avoid import problem for cython code. The directory does # not really matter, should be just different from the toplevel dir. changedir = {toxworkdir} -deps = -rrequirements.d/development.txt +deps = + -rrequirements.d/development.txt + attic: attic commands = py.test --cov=borg --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = * From a81755f1a98f071f668287e49a32964c92466d5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:34:18 -0400 Subject: [PATCH 18/63] use triple-double-quoted instead of single-double-quoted at the request of TW, see #231 --- borg/archiver.py | 2 +- borg/converter.py | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 2c4302b3a..696291f1c 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -464,7 +464,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") return self.exit_code def do_convert(self, parser, commands, args): - '''convert a repository from attic to borg''' + """convert a repository from attic to borg""" repo = AtticRepositoryConverter(os.path.join(args.repository, 'repository'), create=False) try: repo.convert(args.dry_run) diff --git a/borg/converter.py b/borg/converter.py index d949fd31a..66606095f 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -10,12 +10,12 @@ class NotImplementedException(Exception): class AtticRepositoryConverter(Repository): def convert(self, dryrun=True): - '''convert an attic repository to a borg repository + """convert an attic repository to a borg repository those are the files that need to be converted here, from most important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if - missing.''' + missing.""" print("reading segments from attic repository using borg") segments = [ filename for i, filename in self.io.segment_iterator() ] try: @@ -30,13 +30,13 @@ class AtticRepositoryConverter(Repository): @staticmethod def convert_segments(segments, dryrun): - '''convert repository segments from attic to borg + """convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in `$ATTIC_REPO/data/**`. luckily the segment length didn't change so we can just - replace the 8 first bytes of all regular files in there.''' + replace the 8 first bytes of all regular files in there.""" for filename in segments: print("converting segment %s in place" % filename) if dryrun: @@ -46,7 +46,7 @@ class AtticRepositoryConverter(Repository): segment.write(MAGIC) def find_attic_keyfile(self): - '''find the attic keyfiles + """find the attic keyfiles the keyfiles are loaded by `KeyfileKey.find_key_file()`. that finds the keys with the right identifier for the repo @@ -61,13 +61,13 @@ class AtticRepositoryConverter(Repository): this is split in a separate function in case we want to use the attic code here directly, instead of our local - implementation.''' + implementation.""" return AtticKeyfileKey.find_key_file(self) @staticmethod def convert_keyfiles(keyfile, dryrun): - '''convert key files from attic to borg + """convert key files from attic to borg replacement pattern is `s/ATTIC KEY/BORG_KEY/` in `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or @@ -77,7 +77,7 @@ class AtticRepositoryConverter(Repository): no need to decrypt to convert. we need to rewrite the whole key file because magic number length changed, but that's not a problem because the keyfiles are small (compared to, say, - all the segments).''' + all the segments).""" print("converting keyfile %s" % keyfile) with open(keyfile, 'r') as f: data = f.read() @@ -95,7 +95,7 @@ class AtticRepositoryConverter(Repository): assert data.startswith(KeyfileKey.FILE_ID) def convert_cache(self, dryrun): - '''convert caches from attic to borg + """convert caches from attic to borg those are all hash indexes, so we need to `s/ATTICIDX/BORG_IDX/` in a few locations: @@ -111,11 +111,11 @@ class AtticRepositoryConverter(Repository): but if we'd want to convert, we could open it with the `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right - ''' + """ raise NotImplementedException('cache conversion not implemented, next borg backup will take longer to rebuild those caches') class AtticKeyfileKey(KeyfileKey): - '''backwards compatible Attick key file parser''' + """backwards compatible Attick key file parser""" FILE_ID = 'ATTIC KEY' # verbatim copy from attic @@ -127,7 +127,7 @@ class AtticKeyfileKey(KeyfileKey): @classmethod def find_key_file(cls, repository): - '''copy of attic's `find_key_file`_ + """copy of attic's `find_key_file`_ this has two small modifications: @@ -137,7 +137,7 @@ class AtticKeyfileKey(KeyfileKey): 2. it uses `repository.path`_ instead of `repository._location.canonical_path`_ because we can't assume the repository has been opened by the archiver yet - ''' + """ get_keys_dir = cls.get_keys_dir id = hexlify(repository.id).decode('ascii') keys_dir = get_keys_dir() From efbad396f4d90a03f84cad859bedfa9ec169735b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:36:20 -0400 Subject: [PATCH 19/63] help text review: magic s/number/string/, s/can/must/ --- borg/archiver.py | 8 ++++---- borg/converter.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 696291f1c..832983520 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -909,19 +909,19 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") convert_epilog = textwrap.dedent(""" convert will convert an existing Attic repository to Borg in place. - it will change the magic numbers in the repository's segments - to match the new Borg magic numbers. the keyfiles found in + it will change the magic strings in the repository's segments + to match the new Borg magic strings. the keyfiles found in $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and copied to $BORG_KEYS_DIR or ~/.borg/keys. the cache files are *not* currently converted, which will - result in a much longer backup the first time. you can run + result in a much longer backup the first time. you must run `borg check --repair` to rebuild those files after the conversion. the conversion can IRREMEDIABLY DAMAGE YOUR REPOSITORY! Attic will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as - the magic numbers will have changed. + the magic strings will have changed. it is recommended you run this on a copy of the Attic repository, in case something goes wrong, for example: diff --git a/borg/converter.py b/borg/converter.py index 66606095f..6b35b6cb7 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -35,7 +35,7 @@ class AtticRepositoryConverter(Repository): replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in `$ATTIC_REPO/data/**`. - luckily the segment length didn't change so we can just + luckily the magic string length didn't change so we can just replace the 8 first bytes of all regular files in there.""" for filename in segments: print("converting segment %s in place" % filename) @@ -75,7 +75,7 @@ class AtticRepositoryConverter(Repository): `$HOME/.borg/keys`. no need to decrypt to convert. we need to rewrite the whole - key file because magic number length changed, but that's not a + key file because magic string length changed, but that's not a problem because the keyfiles are small (compared to, say, all the segments).""" print("converting keyfile %s" % keyfile) From c2913f5f1052e47872ca1cc3bcc55db3c92123d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:40:56 -0400 Subject: [PATCH 20/63] style: don't use continue for nothing --- borg/converter.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 6b35b6cb7..61c26b3ae 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -39,11 +39,10 @@ class AtticRepositoryConverter(Repository): replace the 8 first bytes of all regular files in there.""" for filename in segments: print("converting segment %s in place" % filename) - if dryrun: - continue - with open(filename, 'r+b') as segment: - segment.seek(0) - segment.write(MAGIC) + if not dryrun: + with open(filename, 'r+b') as segment: + segment.seek(0) + segment.write(MAGIC) def find_attic_keyfile(self): """find the attic keyfiles From dbd4ac7f8d09265ef468d2531446d42843897d51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:41:44 -0400 Subject: [PATCH 21/63] add missing colon --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 61c26b3ae..6be3f823f 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -48,7 +48,7 @@ class AtticRepositoryConverter(Repository): """find the attic keyfiles the keyfiles are loaded by `KeyfileKey.find_key_file()`. that - finds the keys with the right identifier for the repo + finds the keys with the right identifier for the repo. this is expected to look into $HOME/.attic/keys or $ATTIC_KEYS_DIR for key files matching the given Borg From 5b8cb63479b1f189a59979417682aa1d56467df4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:43:05 -0400 Subject: [PATCH 22/63] remove duplicate code with the unit test --- borg/converter.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 6be3f823f..751791006 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -89,9 +89,6 @@ class AtticRepositoryConverter(Repository): if not dryrun: with open(keyfile, 'w') as f: f.write(data) - with open(keyfile, 'r') as f: - data = f.read() - assert data.startswith(KeyfileKey.FILE_ID) def convert_cache(self, dryrun): """convert caches from attic to borg From ef0ed409b683c4dbaede8c75a8c68585aecc449f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:44:17 -0400 Subject: [PATCH 23/63] fix typo --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 751791006..a9a706ad9 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -111,7 +111,7 @@ class AtticRepositoryConverter(Repository): raise NotImplementedException('cache conversion not implemented, next borg backup will take longer to rebuild those caches') class AtticKeyfileKey(KeyfileKey): - """backwards compatible Attick key file parser""" + """backwards compatible Attic key file parser""" FILE_ID = 'ATTIC KEY' # verbatim copy from attic From d66516351f0885524a1b8c24375ca19c9d330909 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:46:30 -0400 Subject: [PATCH 24/63] use builtin NotImplementedError instead of writing our own NotImplemented didn't work with pytest.raise(), i didn't know about NotImplementedError, thanks tw --- borg/archiver.py | 4 ++-- borg/converter.py | 5 +---- borg/testsuite/convert.py | 4 ++-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 832983520..5c08880d6 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -17,7 +17,7 @@ import traceback from . import __version__ from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS from .compress import Compressor, COMPR_BUFFER -from .converter import AtticRepositoryConverter, NotImplementedException +from .converter import AtticRepositoryConverter from .repository import Repository from .cache import Cache from .key import key_creator @@ -468,7 +468,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") repo = AtticRepositoryConverter(os.path.join(args.repository, 'repository'), create=False) try: repo.convert(args.dry_run) - except NotImplementedException as e: + except NotImplementedError as e: print("warning: %s" % e) return self.exit_code diff --git a/borg/converter.py b/borg/converter.py index a9a706ad9..8261d9281 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -5,9 +5,6 @@ from .helpers import get_keys_dir from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError -class NotImplementedException(Exception): - pass - class AtticRepositoryConverter(Repository): def convert(self, dryrun=True): """convert an attic repository to a borg repository @@ -108,7 +105,7 @@ class AtticRepositoryConverter(Repository): `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right """ - raise NotImplementedException('cache conversion not implemented, next borg backup will take longer to rebuild those caches') + raise NotImplementedError('cache conversion not implemented, next borg backup will take longer to rebuild those caches') class AtticKeyfileKey(KeyfileKey): """backwards compatible Attic key file parser""" diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 172b308ec..208f6604e 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -12,7 +12,7 @@ except ImportError: pytestmark = pytest.mark.skipif(attic is None, reason = 'cannot find an attic install') -from ..converter import AtticRepositoryConverter, NotImplementedException, AtticKeyfileKey +from ..converter import AtticRepositoryConverter, AtticKeyfileKey from ..helpers import get_keys_dir from ..key import KeyfileKey from ..repository import Repository, MAGIC @@ -87,7 +87,7 @@ class EncryptedConversionTestCase(ConversionTestCase): # check should fail because of magic number self.check_repo(False) print("opening attic repository with borg and converting") - with pytest.raises(NotImplementedException): + with pytest.raises(NotImplementedError): self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), From d5198c551b1d650f60e1b520eb672a8f5b5fb7f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:47:23 -0400 Subject: [PATCH 25/63] split out depends in imports --- borg/testsuite/convert.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 208f6604e..3a413072e 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -1,8 +1,9 @@ import os -import pytest import shutil import tempfile +import pytest + try: import attic.repository import attic.key From 5f6eb87385e0945022582b7a2f160794eedb52b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:50:06 -0400 Subject: [PATCH 26/63] much nicer validation checking --- borg/testsuite/convert.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 3a413072e..155e568c3 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -37,22 +37,21 @@ class ConversionTestCase(BaseTestCase): def tearDown(self): shutil.rmtree(self.tmppath) - def check_repo(self, state = True): - if not state: - print("this will show an error, this is expected") + def repo_valid(self,): repository = self.open(self.tmppath) - assert repository.check() is state # can't check raises() because check() handles the error + state = repository.check() # can't check raises() because check() handles the error repository.close() + return state def test_convert_segments(self): # check should fail because of magic number - self.check_repo(False) + assert not self.repo_valid() print("opening attic repository with borg and converting") repo = self.open(self.tmppath, repo_type = AtticRepositoryConverter) segments = [ filename for i, filename in repo.io.segment_iterator() ] repo.close() repo.convert_segments(segments, dryrun=False) - self.check_repo() + assert self.repo_valid() class EncryptedConversionTestCase(ConversionTestCase): class MockArgs: @@ -86,7 +85,7 @@ class EncryptedConversionTestCase(ConversionTestCase): def test_convert_all(self): # check should fail because of magic number - self.check_repo(False) + assert not self.repo_valid() print("opening attic repository with borg and converting") with pytest.raises(NotImplementedError): self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) @@ -95,4 +94,4 @@ class EncryptedConversionTestCase(ConversionTestCase): os.path.basename(self.key.path)) with open(keyfile, 'r') as f: assert f.read().startswith(KeyfileKey.FILE_ID) - self.check_repo() + assert self.repo_valid() From 4a85f2d0f54fa236d792cce2a1a4f96fca13dfd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:58:00 -0400 Subject: [PATCH 27/63] fix most pep8 warnings * limit all lines to 80 chars * remove spaces around parameters * missing blank lines --- borg/testsuite/convert.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 155e568c3..1943b5df2 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -11,7 +11,7 @@ try: except ImportError: attic = None pytestmark = pytest.mark.skipif(attic is None, - reason = 'cannot find an attic install') + reason='cannot find an attic install') from ..converter import AtticRepositoryConverter, AtticKeyfileKey from ..helpers import get_keys_dir @@ -19,17 +19,18 @@ from ..key import KeyfileKey from ..repository import Repository, MAGIC from . import BaseTestCase + class ConversionTestCase(BaseTestCase): - def open(self, path, repo_type = Repository, create=False): - return repo_type(os.path.join(path, 'repository'), create = create) + def open(self, path, repo_type=Repository, create=False): + return repo_type(os.path.join(path, 'repository'), create=create) def setUp(self): self.tmppath = tempfile.mkdtemp() self.attic_repo = self.open(self.tmppath, - repo_type = attic.repository.Repository, - create = True) - # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ + repo_type=attic.repository.Repository, + create=True) + # throw some stuff in that repo, copied from `RepositoryTestCase.test1` for x in range(100): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') self.attic_repo.close() @@ -39,7 +40,8 @@ class ConversionTestCase(BaseTestCase): def repo_valid(self,): repository = self.open(self.tmppath) - state = repository.check() # can't check raises() because check() handles the error + # can't check raises() because check() handles the error + state = repository.check() repository.close() return state @@ -47,12 +49,13 @@ class ConversionTestCase(BaseTestCase): # check should fail because of magic number assert not self.repo_valid() print("opening attic repository with borg and converting") - repo = self.open(self.tmppath, repo_type = AtticRepositoryConverter) - segments = [ filename for i, filename in repo.io.segment_iterator() ] + repo = self.open(self.tmppath, repo_type=AtticRepositoryConverter) + segments = [filename for i, filename in repo.io.segment_iterator()] repo.close() repo.convert_segments(segments, dryrun=False) assert self.repo_valid() + class EncryptedConversionTestCase(ConversionTestCase): class MockArgs: def __init__(self, path): @@ -70,10 +73,12 @@ class EncryptedConversionTestCase(ConversionTestCase): # about anyways. in real runs, the original key will be retained. os.environ['BORG_KEYS_DIR'] = self.tmppath os.environ['ATTIC_PASSPHRASE'] = 'test' - self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) + self.key = attic.key.KeyfileKey.create(self.attic_repo, + self.MockArgs(self.tmppath)) def test_keys(self): - repository = self.open(self.tmppath, repo_type = AtticRepositoryConverter) + repository = self.open(self.tmppath, + repo_type=AtticRepositoryConverter) keyfile = AtticKeyfileKey.find_key_file(repository) AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) @@ -87,8 +92,9 @@ class EncryptedConversionTestCase(ConversionTestCase): # check should fail because of magic number assert not self.repo_valid() print("opening attic repository with borg and converting") + repo = self.open(self.tmppath, repo_type=AtticRepositoryConverter) with pytest.raises(NotImplementedError): - self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) + repo.convert(dryrun=False) # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), os.path.basename(self.key.path)) From b9c474d1877190ef73e295c46ac8b7ae58a803cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:59:01 -0400 Subject: [PATCH 28/63] pep8: put pytest skip marker after imports --- borg/testsuite/convert.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 1943b5df2..08472ef93 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -10,8 +10,6 @@ try: import attic.helpers except ImportError: attic = None -pytestmark = pytest.mark.skipif(attic is None, - reason='cannot find an attic install') from ..converter import AtticRepositoryConverter, AtticKeyfileKey from ..helpers import get_keys_dir @@ -19,6 +17,9 @@ from ..key import KeyfileKey from ..repository import Repository, MAGIC from . import BaseTestCase +pytestmark = pytest.mark.skipif(attic is None, + reason='cannot find an attic install') + class ConversionTestCase(BaseTestCase): From 79d9aebaf2e0f1b533f81815b4eefde20ba9938a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 09:00:49 -0400 Subject: [PATCH 29/63] use permanently instead of irrevocably, which is less common --- borg/archiver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/archiver.py b/borg/archiver.py index 5c08880d6..5c33b5f7b 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -919,7 +919,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") `borg check --repair` to rebuild those files after the conversion. - the conversion can IRREMEDIABLY DAMAGE YOUR REPOSITORY! Attic + the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as the magic strings will have changed. From 57801a288d43c96e9a93894334a61e6ffc6c89f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 09:03:41 -0400 Subject: [PATCH 30/63] keep tests simple by always adding attic depends note that we do not depend on attic to build borg, just to do those tests. if attic goes away, we could eventually do this another way or just stop testing this altogether. --- tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index 8fd697657..a9ccb5e04 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ # fakeroot -u tox --recreate [tox] -envlist = py{32,33,34,35}{,-attic} +envlist = py{32,33,34,35} [testenv] # Change dir to avoid import problem for cython code. The directory does @@ -10,7 +10,7 @@ envlist = py{32,33,34,35}{,-attic} changedir = {toxworkdir} deps = -rrequirements.d/development.txt - attic: attic + attic commands = py.test --cov=borg --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = * From 58815bc28a795bf4a77a288c4edbda7b32c004f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 09:23:17 -0400 Subject: [PATCH 31/63] fix commandline dispatch for converter --- borg/archiver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 5c33b5f7b..02c6ea781 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -463,9 +463,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") stats.print_('Deleted data:', cache) return self.exit_code - def do_convert(self, parser, commands, args): + def do_convert(self, args): """convert a repository from attic to borg""" - repo = AtticRepositoryConverter(os.path.join(args.repository, 'repository'), create=False) + repo = AtticRepositoryConverter(args.repository.path, create=False) try: repo.convert(args.dry_run) except NotImplementedError as e: From 98e4e6bc253f067cc5c45f046073d179e2d668d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 09:35:17 -0400 Subject: [PATCH 32/63] lock repository when converting segments --- borg/converter.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/borg/converter.py b/borg/converter.py index 8261d9281..99de15170 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -2,6 +2,7 @@ from binascii import hexlify import os from .helpers import get_keys_dir +from .locking import UpgradableLock from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError @@ -22,7 +23,12 @@ class AtticRepositoryConverter(Repository): else: self.convert_keyfiles(keyfile, dryrun) self.close() + # partial open: just hold on to the lock + self.lock = UpgradableLock(os.path.join(self.path, 'lock'), + exclusive=True).acquire() self.convert_segments(segments, dryrun) + self.lock.release() + self.lock = None self.convert_cache(dryrun) @staticmethod @@ -34,6 +40,7 @@ class AtticRepositoryConverter(Repository): luckily the magic string length didn't change so we can just replace the 8 first bytes of all regular files in there.""" + print("converting %d segments..." % len(segments)) for filename in segments: print("converting segment %s in place" % filename) if not dryrun: From f5cb0f4e731bf63b5a7c0795eb612c01b95ac7ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 10:41:31 -0400 Subject: [PATCH 33/63] rewrite convert tests with pytest fixtures --- borg/testsuite/convert.py | 140 ++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 73 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 08472ef93..ac7d6cbca 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -15,90 +15,84 @@ from ..converter import AtticRepositoryConverter, AtticKeyfileKey from ..helpers import get_keys_dir from ..key import KeyfileKey from ..repository import Repository, MAGIC -from . import BaseTestCase pytestmark = pytest.mark.skipif(attic is None, reason='cannot find an attic install') +def repo_open(path, repo_type=Repository, create=False): + return repo_type(os.path.join(str(path), 'repository'), create=create) -class ConversionTestCase(BaseTestCase): +def repo_valid(path): + repository = repo_open(str(path)) + # can't check raises() because check() handles the error + state = repository.check() + repository.close() + return state - def open(self, path, repo_type=Repository, create=False): - return repo_type(os.path.join(path, 'repository'), create=create) +@pytest.fixture(autouse=True) +def attic_repo(tmpdir): + attic_repo = repo_open(str(tmpdir), + repo_type=attic.repository.Repository, + create=True) + # throw some stuff in that repo, copied from `RepositoryTestCase.test1` + for x in range(100): + attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + attic_repo.close() + return attic_repo - def setUp(self): - self.tmppath = tempfile.mkdtemp() - self.attic_repo = self.open(self.tmppath, - repo_type=attic.repository.Repository, - create=True) - # throw some stuff in that repo, copied from `RepositoryTestCase.test1` - for x in range(100): - self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') - self.attic_repo.close() +@pytest.mark.usefixtures("tmpdir") +def test_convert_segments(tmpdir, attic_repo): + # check should fail because of magic number + assert not repo_valid(tmpdir) + print("opening attic repository with borg and converting") + repo = repo_open(tmpdir, repo_type=AtticRepositoryConverter) + segments = [filename for i, filename in repo.io.segment_iterator()] + repo.close() + repo.convert_segments(segments, dryrun=False) + assert repo_valid(tmpdir) - def tearDown(self): - shutil.rmtree(self.tmppath) +class MockArgs: + def __init__(self, path): + self.repository = attic.helpers.Location(path) - def repo_valid(self,): - repository = self.open(self.tmppath) - # can't check raises() because check() handles the error - state = repository.check() - repository.close() - return state +@pytest.fixture() +def attic_key_file(attic_repo, tmpdir): + keys_dir = str(tmpdir.mkdir('keys')) - def test_convert_segments(self): - # check should fail because of magic number - assert not self.repo_valid() - print("opening attic repository with borg and converting") - repo = self.open(self.tmppath, repo_type=AtticRepositoryConverter) - segments = [filename for i, filename in repo.io.segment_iterator()] - repo.close() - repo.convert_segments(segments, dryrun=False) - assert self.repo_valid() + # we use the repo dir for the created keyfile, because we do + # not want to clutter existing keyfiles + os.environ['ATTIC_KEYS_DIR'] = keys_dir + # we use the same directory for the converted files, which + # will clutter the previously created one, which we don't care + # about anyways. in real runs, the original key will be retained. + os.environ['BORG_KEYS_DIR'] = keys_dir + os.environ['ATTIC_PASSPHRASE'] = 'test' + return attic.key.KeyfileKey.create(attic_repo, + MockArgs(keys_dir)) -class EncryptedConversionTestCase(ConversionTestCase): - class MockArgs: - def __init__(self, path): - self.repository = attic.helpers.Location(path) +def test_keys(tmpdir, attic_repo, attic_key_file): + repository = repo_open(tmpdir, + repo_type=AtticRepositoryConverter) + keyfile = AtticKeyfileKey.find_key_file(repository) + AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) - def setUp(self): - super().setUp() + # check that the new keyfile is alright + keyfile = os.path.join(get_keys_dir(), + os.path.basename(attic_key_file.path)) + with open(keyfile, 'r') as f: + assert f.read().startswith(KeyfileKey.FILE_ID) - # we use the repo dir for the created keyfile, because we do - # not want to clutter existing keyfiles - os.environ['ATTIC_KEYS_DIR'] = self.tmppath - - # we use the same directory for the converted files, which - # will clutter the previously created one, which we don't care - # about anyways. in real runs, the original key will be retained. - os.environ['BORG_KEYS_DIR'] = self.tmppath - os.environ['ATTIC_PASSPHRASE'] = 'test' - self.key = attic.key.KeyfileKey.create(self.attic_repo, - self.MockArgs(self.tmppath)) - - def test_keys(self): - repository = self.open(self.tmppath, - repo_type=AtticRepositoryConverter) - keyfile = AtticKeyfileKey.find_key_file(repository) - AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) - - # check that the new keyfile is alright - keyfile = os.path.join(get_keys_dir(), - os.path.basename(self.key.path)) - with open(keyfile, 'r') as f: - assert f.read().startswith(KeyfileKey.FILE_ID) - - def test_convert_all(self): - # check should fail because of magic number - assert not self.repo_valid() - print("opening attic repository with borg and converting") - repo = self.open(self.tmppath, repo_type=AtticRepositoryConverter) - with pytest.raises(NotImplementedError): - repo.convert(dryrun=False) - # check that the new keyfile is alright - keyfile = os.path.join(get_keys_dir(), - os.path.basename(self.key.path)) - with open(keyfile, 'r') as f: - assert f.read().startswith(KeyfileKey.FILE_ID) - assert self.repo_valid() +def test_convert_all(tmpdir, attic_repo, attic_key_file): + # check should fail because of magic number + assert not repo_valid(tmpdir) + print("opening attic repository with borg and converting") + repo = repo_open(tmpdir, repo_type=AtticRepositoryConverter) + with pytest.raises(NotImplementedError): + repo.convert(dryrun=False) + # check that the new keyfile is alright + keyfile = os.path.join(get_keys_dir(), + os.path.basename(attic_key_file.path)) + with open(keyfile, 'r') as f: + assert f.read().startswith(KeyfileKey.FILE_ID) + assert repo_valid(tmpdir) From a08bcb21aee3594287551ec4b1e8e8c119c8f65d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:10:00 -0400 Subject: [PATCH 34/63] refactor common code we get rid of repo_open() which doesn't same much typing, and add a validator for keys --- borg/testsuite/convert.py | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index ac7d6cbca..e3f9be5d1 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -19,21 +19,22 @@ from ..repository import Repository, MAGIC pytestmark = pytest.mark.skipif(attic is None, reason='cannot find an attic install') -def repo_open(path, repo_type=Repository, create=False): - return repo_type(os.path.join(str(path), 'repository'), create=create) - def repo_valid(path): - repository = repo_open(str(path)) + repository = Repository(str(path), create=False) # can't check raises() because check() handles the error state = repository.check() repository.close() return state +def key_valid(path): + keyfile = os.path.join(get_keys_dir(), + os.path.basename(path)) + with open(keyfile, 'r') as f: + return f.read().startswith(KeyfileKey.FILE_ID) + @pytest.fixture(autouse=True) def attic_repo(tmpdir): - attic_repo = repo_open(str(tmpdir), - repo_type=attic.repository.Repository, - create=True) + attic_repo = attic.repository.Repository(str(tmpdir), create=True) # throw some stuff in that repo, copied from `RepositoryTestCase.test1` for x in range(100): attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') @@ -45,7 +46,7 @@ def test_convert_segments(tmpdir, attic_repo): # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") - repo = repo_open(tmpdir, repo_type=AtticRepositoryConverter) + repo = AtticRepositoryConverter(str(tmpdir), create=False) segments = [filename for i, filename in repo.io.segment_iterator()] repo.close() repo.convert_segments(segments, dryrun=False) @@ -72,27 +73,17 @@ def attic_key_file(attic_repo, tmpdir): MockArgs(keys_dir)) def test_keys(tmpdir, attic_repo, attic_key_file): - repository = repo_open(tmpdir, - repo_type=AtticRepositoryConverter) + repository = AtticRepositoryConverter(str(tmpdir), create=False) keyfile = AtticKeyfileKey.find_key_file(repository) AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) - - # check that the new keyfile is alright - keyfile = os.path.join(get_keys_dir(), - os.path.basename(attic_key_file.path)) - with open(keyfile, 'r') as f: - assert f.read().startswith(KeyfileKey.FILE_ID) + assert key_valid(attic_key_file.path) def test_convert_all(tmpdir, attic_repo, attic_key_file): # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") - repo = repo_open(tmpdir, repo_type=AtticRepositoryConverter) + repo = AtticRepositoryConverter(str(tmpdir), create=False) with pytest.raises(NotImplementedError): repo.convert(dryrun=False) - # check that the new keyfile is alright - keyfile = os.path.join(get_keys_dir(), - os.path.basename(attic_key_file.path)) - with open(keyfile, 'r') as f: - assert f.read().startswith(KeyfileKey.FILE_ID) + assert key_valid(attic_key_file.path) assert repo_valid(tmpdir) From 7f6fd1f30686ffcb68bbfa87ba75978d691b8b0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:11:30 -0400 Subject: [PATCH 35/63] add docs for all converter test code --- borg/testsuite/convert.py | 66 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index e3f9be5d1..cc85dfca3 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -20,6 +20,12 @@ pytestmark = pytest.mark.skipif(attic is None, reason='cannot find an attic install') def repo_valid(path): + """ + utility function to check if borg can open a repository + + :param path: the path to the repository + :returns: if borg can check the repository + """ repository = Repository(str(path), create=False) # can't check raises() because check() handles the error state = repository.check() @@ -27,6 +33,12 @@ def repo_valid(path): return state def key_valid(path): + """ + check that the new keyfile is alright + + :param path: the path to the key file + :returns: if the file starts with the borg magic string + """ keyfile = os.path.join(get_keys_dir(), os.path.basename(path)) with open(keyfile, 'r') as f: @@ -34,6 +46,12 @@ def key_valid(path): @pytest.fixture(autouse=True) def attic_repo(tmpdir): + """ + create an attic repo with some stuff in it + + :param tmpdir: path to the repository to be created + :returns: a attic.repository.Repository object + """ attic_repo = attic.repository.Repository(str(tmpdir), create=True) # throw some stuff in that repo, copied from `RepositoryTestCase.test1` for x in range(100): @@ -43,6 +61,16 @@ def attic_repo(tmpdir): @pytest.mark.usefixtures("tmpdir") def test_convert_segments(tmpdir, attic_repo): + """test segment conversion + + this will load the given attic repository, list all the segments + then convert them one at a time. we need to close the repo before + conversion otherwise we have errors from borg + + :param tmpdir: a temporary directory to run the test in (builtin + fixture) + :param attic_repo: a populated attic repository (fixture) + """ # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") @@ -53,11 +81,27 @@ def test_convert_segments(tmpdir, attic_repo): assert repo_valid(tmpdir) class MockArgs: + """ + mock attic location + + this is used to simulate a key location with a properly loaded + repository object to create a key file + """ def __init__(self, path): self.repository = attic.helpers.Location(path) @pytest.fixture() def attic_key_file(attic_repo, tmpdir): + """ + create an attic key file from the given repo, in the keys + subdirectory of the given tmpdir + + :param attic_repo: an attic.repository.Repository object (fixture + define above) + :param tmpdir: a temporary directory (a builtin fixture) + :returns: the KeyfileKey object as returned by + attic.key.KeyfileKey.create() + """ keys_dir = str(tmpdir.mkdir('keys')) # we use the repo dir for the created keyfile, because we do @@ -73,12 +117,34 @@ def attic_key_file(attic_repo, tmpdir): MockArgs(keys_dir)) def test_keys(tmpdir, attic_repo, attic_key_file): + """test key conversion + + test that we can convert the given key to a properly formatted + borg key. assumes that the ATTIC_KEYS_DIR and BORG_KEYS_DIR have + been properly populated by the attic_key_file fixture. + + :param tmpdir: a temporary directory (a builtin fixture) + :param attic_repo: an attic.repository.Repository object (fixture + define above) + :param attic_key_file: an attic.key.KeyfileKey (fixture created above) + """ repository = AtticRepositoryConverter(str(tmpdir), create=False) keyfile = AtticKeyfileKey.find_key_file(repository) AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) assert key_valid(attic_key_file.path) def test_convert_all(tmpdir, attic_repo, attic_key_file): + """test all conversion steps + + this runs everything. mostly redundant test, since everything is + done above. yet we expect a NotImplementedError because we do not + convert caches yet. + + :param tmpdir: a temporary directory (a builtin fixture) + :param attic_repo: an attic.repository.Repository object (fixture + define above) + :param attic_key_file: an attic.key.KeyfileKey (fixture created above) + """ # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") From 6c318a0f273e522851100f7094a961396f4743e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:12:23 -0400 Subject: [PATCH 36/63] re-pep8 --- borg/testsuite/convert.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index cc85dfca3..5596f4e65 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -19,6 +19,7 @@ from ..repository import Repository, MAGIC pytestmark = pytest.mark.skipif(attic is None, reason='cannot find an attic install') + def repo_valid(path): """ utility function to check if borg can open a repository @@ -32,6 +33,7 @@ def repo_valid(path): repository.close() return state + def key_valid(path): """ check that the new keyfile is alright @@ -44,6 +46,7 @@ def key_valid(path): with open(keyfile, 'r') as f: return f.read().startswith(KeyfileKey.FILE_ID) + @pytest.fixture(autouse=True) def attic_repo(tmpdir): """ @@ -59,6 +62,7 @@ def attic_repo(tmpdir): attic_repo.close() return attic_repo + @pytest.mark.usefixtures("tmpdir") def test_convert_segments(tmpdir, attic_repo): """test segment conversion @@ -80,6 +84,7 @@ def test_convert_segments(tmpdir, attic_repo): repo.convert_segments(segments, dryrun=False) assert repo_valid(tmpdir) + class MockArgs: """ mock attic location @@ -90,6 +95,7 @@ class MockArgs: def __init__(self, path): self.repository = attic.helpers.Location(path) + @pytest.fixture() def attic_key_file(attic_repo, tmpdir): """ @@ -116,6 +122,7 @@ def attic_key_file(attic_repo, tmpdir): return attic.key.KeyfileKey.create(attic_repo, MockArgs(keys_dir)) + def test_keys(tmpdir, attic_repo, attic_key_file): """test key conversion @@ -133,6 +140,7 @@ def test_keys(tmpdir, attic_repo, attic_key_file): AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) assert key_valid(attic_key_file.path) + def test_convert_all(tmpdir, attic_repo, attic_key_file): """test all conversion steps From 946aca97a1ce48e94e5c00be146e58b3f2f5a28a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:25:02 -0400 Subject: [PATCH 37/63] avoid flooding the console instead we add progress information --- borg/converter.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 99de15170..b662c1a32 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -1,5 +1,6 @@ from binascii import hexlify import os +import time from .helpers import get_keys_dir from .locking import UpgradableLock @@ -41,12 +42,17 @@ class AtticRepositoryConverter(Repository): luckily the magic string length didn't change so we can just replace the 8 first bytes of all regular files in there.""" print("converting %d segments..." % len(segments)) + i = 0 for filename in segments: - print("converting segment %s in place" % filename) - if not dryrun: + print("\rconverting segment %s in place (%d/%d)" % (filename, i, len(segments)), end='') + i += 1 + if dryrun: + time.sleep(0.001) + else: with open(filename, 'r+b') as segment: segment.seek(0) segment.write(MAGIC) + print() def find_attic_keyfile(self): """find the attic keyfiles From 0d457bc8466e9fbbdb7f069f8707ea93333ce4d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:25:12 -0400 Subject: [PATCH 38/63] clarify what to do about the cache warning --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index b662c1a32..04dd911a7 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -118,7 +118,7 @@ class AtticRepositoryConverter(Repository): `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right """ - raise NotImplementedError('cache conversion not implemented, next borg backup will take longer to rebuild those caches') + raise NotImplementedError('cache conversion not implemented, next borg backup will take longer to rebuild those caches. use borg check --repair to rebuild now') class AtticKeyfileKey(KeyfileKey): """backwards compatible Attic key file parser""" From 3bb3bd45fc1074a840b5c60dff391c92d6981074 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 12:36:53 -0400 Subject: [PATCH 39/63] add percentage progress --- borg/converter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 04dd911a7..f32187fe8 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -44,7 +44,8 @@ class AtticRepositoryConverter(Repository): print("converting %d segments..." % len(segments)) i = 0 for filename in segments: - print("\rconverting segment %s in place (%d/%d)" % (filename, i, len(segments)), end='') + print("\rconverting segment %d/%d in place, %.2f%% done (%s)" + % (i, len(segments), float(i)/len(segments), filename), end='') i += 1 if dryrun: time.sleep(0.001) From 6a72252b69e0ef07b9e0c54b669e0a762f4f233d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 14:22:29 -0400 Subject: [PATCH 40/63] release lock properly if segment conversion crashes --- borg/converter.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index f32187fe8..7e8e2f75e 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -27,9 +27,11 @@ class AtticRepositoryConverter(Repository): # partial open: just hold on to the lock self.lock = UpgradableLock(os.path.join(self.path, 'lock'), exclusive=True).acquire() - self.convert_segments(segments, dryrun) - self.lock.release() - self.lock = None + try: + self.convert_segments(segments, dryrun) + finally: + self.lock.release() + self.lock = None self.convert_cache(dryrun) @staticmethod From 180dfcb18f87555d2a1c555c9af28ed7061e3afb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 14:23:43 -0400 Subject: [PATCH 41/63] remove needless indentation --- borg/converter.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 7e8e2f75e..a65f887e0 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -93,11 +93,8 @@ class AtticRepositoryConverter(Repository): print("converting keyfile %s" % keyfile) with open(keyfile, 'r') as f: data = f.read() - data = data.replace(AtticKeyfileKey.FILE_ID, - KeyfileKey.FILE_ID, - 1) - keyfile = os.path.join(get_keys_dir(), - os.path.basename(keyfile)) + data = data.replace(AtticKeyfileKey.FILE_ID, KeyfileKey.FILE_ID, 1) + keyfile = os.path.join(get_keys_dir(), os.path.basename(keyfile)) print("writing borg keyfile to %s" % keyfile) if not dryrun: with open(keyfile, 'w') as f: From 35b219597f1a1a9ce85a7f676d0513959699a1dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 14:28:49 -0400 Subject: [PATCH 42/63] only write magic num if necessary this could allow speeding up conversions resumed after interruption --- borg/converter.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index a65f887e0..89f912a7d 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -7,6 +7,8 @@ from .locking import UpgradableLock from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError +ATTIC_MAGIC = b'ATTICSEG' + class AtticRepositoryConverter(Repository): def convert(self, dryrun=True): """convert an attic repository to a borg repository @@ -54,7 +56,10 @@ class AtticRepositoryConverter(Repository): else: with open(filename, 'r+b') as segment: segment.seek(0) - segment.write(MAGIC) + # only write if necessary + if (segment.read(len(ATTIC_MAGIC)) == ATTIC_MAGIC): + segment.seek(0) + segment.write(MAGIC) print() def find_attic_keyfile(self): From a7902e56575b3bcddea7057dd2c4a06d8c63cf2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 14:29:09 -0400 Subject: [PATCH 43/63] cosmetic: show 100% when done, not n-1/n% --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 89f912a7d..573411584 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -48,9 +48,9 @@ class AtticRepositoryConverter(Repository): print("converting %d segments..." % len(segments)) i = 0 for filename in segments: + i += 1 print("\rconverting segment %d/%d in place, %.2f%% done (%s)" % (i, len(segments), float(i)/len(segments), filename), end='') - i += 1 if dryrun: time.sleep(0.001) else: From 7c32f555ac45fb02e0c821d697e43976c005cdd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 15:43:16 -0400 Subject: [PATCH 44/63] repository index conversion --- borg/converter.py | 32 ++++++++++++++++++++++++-------- borg/testsuite/convert.py | 5 +++-- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 573411584..899979900 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -18,6 +18,8 @@ class AtticRepositoryConverter(Repository): caches, the latter being optional, as they will be rebuilt if missing.""" print("reading segments from attic repository using borg") + # we need to open it to load the configuration and other fields + self.open(self.path, exclusive=False) segments = [ filename for i, filename in self.io.segment_iterator() ] try: keyfile = self.find_attic_keyfile() @@ -31,10 +33,10 @@ class AtticRepositoryConverter(Repository): exclusive=True).acquire() try: self.convert_segments(segments, dryrun) + self.convert_cache(dryrun) finally: self.lock.release() self.lock = None - self.convert_cache(dryrun) @staticmethod def convert_segments(segments, dryrun): @@ -54,14 +56,19 @@ class AtticRepositoryConverter(Repository): if dryrun: time.sleep(0.001) else: - with open(filename, 'r+b') as segment: - segment.seek(0) - # only write if necessary - if (segment.read(len(ATTIC_MAGIC)) == ATTIC_MAGIC): - segment.seek(0) - segment.write(MAGIC) + AtticRepositoryConverter.header_replace(filename, ATTIC_MAGIC, MAGIC) print() + @staticmethod + def header_replace(filename, old_magic, new_magic): + print("changing header on %s" % filename) + with open(filename, 'r+b') as segment: + segment.seek(0) + # only write if necessary + if (segment.read(len(old_magic)) == old_magic): + segment.seek(0) + segment.write(new_magic) + def find_attic_keyfile(self): """find the attic keyfiles @@ -123,7 +130,16 @@ class AtticRepositoryConverter(Repository): `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right """ - raise NotImplementedError('cache conversion not implemented, next borg backup will take longer to rebuild those caches. use borg check --repair to rebuild now') + caches = [] + transaction_id = self.get_index_transaction_id() + if transaction_id is None: + print('no index file found for repository %s' % self.path) + else: + caches += [os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8')] + for cache in caches: + print("converting cache %s" % cache) + AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + class AtticKeyfileKey(KeyfileKey): """backwards compatible Attic key file parser""" diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 5596f4e65..b57e77097 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -59,6 +59,7 @@ def attic_repo(tmpdir): # throw some stuff in that repo, copied from `RepositoryTestCase.test1` for x in range(100): attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + attic_repo.commit() attic_repo.close() return attic_repo @@ -82,6 +83,7 @@ def test_convert_segments(tmpdir, attic_repo): segments = [filename for i, filename in repo.io.segment_iterator()] repo.close() repo.convert_segments(segments, dryrun=False) + repo.convert_cache(dryrun=False) assert repo_valid(tmpdir) @@ -157,7 +159,6 @@ def test_convert_all(tmpdir, attic_repo, attic_key_file): assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") repo = AtticRepositoryConverter(str(tmpdir), create=False) - with pytest.raises(NotImplementedError): - repo.convert(dryrun=False) + repo.convert(dryrun=False) assert key_valid(attic_key_file.path) assert repo_valid(tmpdir) From 022de5be47174b6017152b60577ab54c9b309a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:01:01 -0400 Subject: [PATCH 45/63] untested file/chunks cache conversion i couldn't figure out how to generate a cache set directly, Archiver is a pain... --- borg/converter.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 899979900..8436f9486 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -1,8 +1,9 @@ from binascii import hexlify import os +import shutil import time -from .helpers import get_keys_dir +from .helpers import get_keys_dir, get_cache_dir from .locking import UpgradableLock from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError @@ -124,7 +125,7 @@ class AtticRepositoryConverter(Repository): `Repository.open()`, which i'm not sure we should use because it may write data on `Repository.close()`... - * the `files` and `chunks` cache (in + * the `files` and `chunks` cache (in `$ATTIC_CACHE_DIR` or `$HOME/.cache/attic//`), which we could just drop, but if we'd want to convert, we could open it with the `Cache.open()`, edit in place and then `Cache.close()` to @@ -136,6 +137,20 @@ class AtticRepositoryConverter(Repository): print('no index file found for repository %s' % self.path) else: caches += [os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8')] + + # copy of attic's get_cache_dir() + attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR', + os.path.join(os.path.expanduser('~'), '.cache', 'attic')) + + # XXX: untested, because generating cache files is a PITA, see + # Archiver.do_create() for proof + for cache in [ 'files', 'chunks' ]: + attic_cache = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'), cache) + if os.path.exists(attic_cache): + borg_cache = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii'), cache) + shutil.copy(attic_cache, borg_cache) + caches += [borg_cache] + for cache in caches: print("converting cache %s" % cache) AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') From 4f9a411ad843469133426c4eea5c4815198a8777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:01:17 -0400 Subject: [PATCH 46/63] remove unneeded fixture decorator --- borg/testsuite/convert.py | 1 - 1 file changed, 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index b57e77097..ceb3efb11 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -64,7 +64,6 @@ def attic_repo(tmpdir): return attic_repo -@pytest.mark.usefixtures("tmpdir") def test_convert_segments(tmpdir, attic_repo): """test segment conversion From 28a033d1d35555a2b46b4a50edb010544cf5e749 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:03:52 -0400 Subject: [PATCH 47/63] remove debug output that clobbers segment spinner --- borg/converter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 8436f9486..27d174b47 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -62,7 +62,6 @@ class AtticRepositoryConverter(Repository): @staticmethod def header_replace(filename, old_magic, new_magic): - print("changing header on %s" % filename) with open(filename, 'r+b') as segment: segment.seek(0) # only write if necessary From 55f79b4999429c10cb99d154bb667fc781986629 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:24:28 -0400 Subject: [PATCH 48/63] complete cache conversion code we need to create the borg cache directory dry run was ignored, fixed. process cache before segment, because we want to do the faster stuff first --- borg/converter.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 27d174b47..ced2b409d 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -17,7 +17,11 @@ class AtticRepositoryConverter(Repository): those are the files that need to be converted here, from most important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if - missing.""" + missing. + + we nevertheless do the order in reverse, as we prefer to do + the fast stuff first, to improve interactivity. + """ print("reading segments from attic repository using borg") # we need to open it to load the configuration and other fields self.open(self.path, exclusive=False) @@ -33,8 +37,8 @@ class AtticRepositoryConverter(Repository): self.lock = UpgradableLock(os.path.join(self.path, 'lock'), exclusive=True).acquire() try: - self.convert_segments(segments, dryrun) self.convert_cache(dryrun) + self.convert_segments(segments, dryrun) finally: self.lock.release() self.lock = None @@ -146,13 +150,21 @@ class AtticRepositoryConverter(Repository): for cache in [ 'files', 'chunks' ]: attic_cache = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'), cache) if os.path.exists(attic_cache): - borg_cache = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii'), cache) - shutil.copy(attic_cache, borg_cache) + borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) + if not os.path.exists(borg_cache_dir): + os.makedirs(borg_cache_dir) + borg_cache = os.path.join(borg_cache_dir, cache) + print("copying attic cache from %s to %s" % (attic_cache, borg_cache)) + if not dryrun: + shutil.copy(attic_cache, borg_cache) caches += [borg_cache] + else: + print("no %s cache found in %s" % (cache, attic_cache)) for cache in caches: print("converting cache %s" % cache) - AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + if not dryrun: + AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') class AtticKeyfileKey(KeyfileKey): From 8022e563a9316883636cbfe3243d0a24277a111f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:27:19 -0400 Subject: [PATCH 49/63] don't clobber existing borg cache --- borg/converter.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index ced2b409d..456647732 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -154,10 +154,13 @@ class AtticRepositoryConverter(Repository): if not os.path.exists(borg_cache_dir): os.makedirs(borg_cache_dir) borg_cache = os.path.join(borg_cache_dir, cache) - print("copying attic cache from %s to %s" % (attic_cache, borg_cache)) - if not dryrun: - shutil.copy(attic_cache, borg_cache) - caches += [borg_cache] + if os.path.exists(borg_cache): + print("borg cache already exists in %s, skipping conversion of %s" % (borg_cache, attic_cache)) + else: + print("copying attic cache from %s to %s" % (attic_cache, borg_cache)) + if not dryrun: + shutil.copyfile(attic_cache, borg_cache) + caches += [borg_cache] else: print("no %s cache found in %s" % (cache, attic_cache)) From 3e7fa0d63339d49b04792bac3f050c42c3e1cba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:41:17 -0400 Subject: [PATCH 50/63] also copy the cache config file to workaround #234 --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 456647732..2657a27a3 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -147,7 +147,7 @@ class AtticRepositoryConverter(Repository): # XXX: untested, because generating cache files is a PITA, see # Archiver.do_create() for proof - for cache in [ 'files', 'chunks' ]: + for cache in [ 'files', 'chunks', 'config' ]: attic_cache = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'), cache) if os.path.exists(attic_cache): borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) From 081b91bea016b43f569e66a681366514af5b0f8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 09:43:10 -0400 Subject: [PATCH 51/63] remove needless paren --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 2657a27a3..db4fca8f7 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -69,7 +69,7 @@ class AtticRepositoryConverter(Repository): with open(filename, 'r+b') as segment: segment.seek(0) # only write if necessary - if (segment.read(len(old_magic)) == old_magic): + if segment.read(len(old_magic)) == old_magic: segment.seek(0) segment.write(new_magic) From 41e9942efea82394585bd3ddae4bf995dc31c8a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 09:43:51 -0400 Subject: [PATCH 52/63] follow naming of tested module --- borg/testsuite/{convert.py => converter.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename borg/testsuite/{convert.py => converter.py} (100%) diff --git a/borg/testsuite/convert.py b/borg/testsuite/converter.py similarity index 100% rename from borg/testsuite/convert.py rename to borg/testsuite/converter.py From d4d1b414b5c7b53ba37f32d7cb5ed8a15ffd6b68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 09:44:53 -0400 Subject: [PATCH 53/63] remove needless autouse --- borg/testsuite/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/converter.py b/borg/testsuite/converter.py index ceb3efb11..b7e3748e9 100644 --- a/borg/testsuite/converter.py +++ b/borg/testsuite/converter.py @@ -47,7 +47,7 @@ def key_valid(path): return f.read().startswith(KeyfileKey.FILE_ID) -@pytest.fixture(autouse=True) +@pytest.fixture() def attic_repo(tmpdir): """ create an attic repo with some stuff in it From 69040588cdf7a7ed7630302378209ad71bf1d47a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 10:10:43 -0400 Subject: [PATCH 54/63] update docs to reflect that cache is converted --- borg/archiver.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 02c6ea781..041f44260 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -914,10 +914,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and copied to $BORG_KEYS_DIR or ~/.borg/keys. - the cache files are *not* currently converted, which will - result in a much longer backup the first time. you must run - `borg check --repair` to rebuild those files after the - conversion. + the cache files are converted, but the cache layout between Borg + and Attic changed, so it is possible the first backup after the + conversion takes longer than expected due to the cache resync. the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as From ad85f64842a95f37445faad2d4bd6d5323100323 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 10:10:50 -0400 Subject: [PATCH 55/63] whitespace --- borg/archiver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 041f44260..357bdad86 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -925,9 +925,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") it is recommended you run this on a copy of the Attic repository, in case something goes wrong, for example: - cp -a attic borg - borg convert -n borg - borg convert borg + cp -a attic borg + borg convert -n borg + borg convert borg you have been warned.""") subparser = subparsers.add_parser('convert', parents=[common_parser], From ea5d00436c723d09769cccc618eed4f69585d73a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 10:12:13 -0400 Subject: [PATCH 56/63] also document the cache locations --- borg/archiver.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 357bdad86..2f5325257 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -914,9 +914,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and copied to $BORG_KEYS_DIR or ~/.borg/keys. - the cache files are converted, but the cache layout between Borg - and Attic changed, so it is possible the first backup after the - conversion takes longer than expected due to the cache resync. + the cache files are converted, from $ATTIC_CACHE_DIR or + ~/.cache/attic to $BORG_CACHE_DIR or ~/.cache/borg, but the + cache layout between Borg and Attic changed, so it is possible + the first backup after the conversion takes longer than expected + due to the cache resync. the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as From 2c66e7c23373cd2ee04ae7199d391d39a5a51a8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 10:49:29 -0400 Subject: [PATCH 57/63] make percentage a real percentage --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index db4fca8f7..7f4127cd8 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -57,7 +57,7 @@ class AtticRepositoryConverter(Repository): for filename in segments: i += 1 print("\rconverting segment %d/%d in place, %.2f%% done (%s)" - % (i, len(segments), float(i)/len(segments), filename), end='') + % (i, len(segments), 100*float(i)/len(segments), filename), end='') if dryrun: time.sleep(0.001) else: From 3773681f00c030b0deff0c503d3d94577b9f32a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 11:07:37 -0400 Subject: [PATCH 58/63] rewire cache copy mechanisms we separate the conversion and the copy in order to be able to copy arbitrary files from attic without converting them. this allows us to copy the config file cleanly without attempting to rewrite its magic number --- borg/converter.py | 56 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 7f4127cd8..39fe3788a 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -144,25 +144,49 @@ class AtticRepositoryConverter(Repository): # copy of attic's get_cache_dir() attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR', os.path.join(os.path.expanduser('~'), '.cache', 'attic')) + attic_cache_dir = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii')) + borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) + + def copy_cache_file(file): + """copy the given attic cache file into the borg directory + + does nothing if dryrun is True. also expects + attic_cache_dir and borg_cache_dir to be set in the parent + scope, to the directories path including the repository + identifier. + + :params file: the basename of the cache file to copy + (example: "files" or "chunks") as a string + + :returns: the borg file that was created or None if non + was created. + + """ + attic_file = os.path.join(attic_cache_dir, file) + if os.path.exists(attic_file): + borg_file = os.path.join(borg_cache_dir, file) + if os.path.exists(borg_file): + print("borg cache file already exists in %s, skipping conversion of %s" % (borg_file, attic_file)) + else: + print("copying attic cache file from %s to %s" % (attic_file, borg_file)) + if not dryrun: + shutil.copyfile(attic_file, borg_file) + return borg_file + else: + print("no %s cache file found in %s" % (file, attic_file)) + return None + + if os.path.exists(attic_cache_dir): + if not os.path.exists(borg_cache_dir): + os.makedirs(borg_cache_dir) + copy_cache_file('config') # XXX: untested, because generating cache files is a PITA, see # Archiver.do_create() for proof - for cache in [ 'files', 'chunks', 'config' ]: - attic_cache = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'), cache) - if os.path.exists(attic_cache): - borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) - if not os.path.exists(borg_cache_dir): - os.makedirs(borg_cache_dir) - borg_cache = os.path.join(borg_cache_dir, cache) - if os.path.exists(borg_cache): - print("borg cache already exists in %s, skipping conversion of %s" % (borg_cache, attic_cache)) - else: - print("copying attic cache from %s to %s" % (attic_cache, borg_cache)) - if not dryrun: - shutil.copyfile(attic_cache, borg_cache) - caches += [borg_cache] - else: - print("no %s cache found in %s" % (cache, attic_cache)) + for cache in [ 'files', 'chunks' ]: + copied = copy_cache_file(cache) + if copied: + caches += [copied] for cache in caches: print("converting cache %s" % cache) From 690541264e8beb6f5789c11c1a426ce65a263344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 11:49:01 -0400 Subject: [PATCH 59/63] style fixes (pep8, append, file builtin) --- borg/converter.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 39fe3788a..14aedb9bb 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -10,6 +10,7 @@ from .key import KeyfileKey, KeyfileNotFoundError ATTIC_MAGIC = b'ATTICSEG' + class AtticRepositoryConverter(Repository): def convert(self, dryrun=True): """convert an attic repository to a borg repository @@ -25,7 +26,7 @@ class AtticRepositoryConverter(Repository): print("reading segments from attic repository using borg") # we need to open it to load the configuration and other fields self.open(self.path, exclusive=False) - segments = [ filename for i, filename in self.io.segment_iterator() ] + segments = [filename for i, filename in self.io.segment_iterator()] try: keyfile = self.find_attic_keyfile() except KeyfileNotFoundError: @@ -121,7 +122,7 @@ class AtticRepositoryConverter(Repository): those are all hash indexes, so we need to `s/ATTICIDX/BORG_IDX/` in a few locations: - + * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` is the `Repository.get_index_transaction_id()`), which we should probably update, with a lock, see @@ -143,28 +144,29 @@ class AtticRepositoryConverter(Repository): # copy of attic's get_cache_dir() attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR', - os.path.join(os.path.expanduser('~'), '.cache', 'attic')) + os.path.join(os.path.expanduser('~'), + '.cache', 'attic')) attic_cache_dir = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii')) borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) - def copy_cache_file(file): - """copy the given attic cache file into the borg directory + def copy_cache_file(path): + """copy the given attic cache path into the borg directory does nothing if dryrun is True. also expects attic_cache_dir and borg_cache_dir to be set in the parent scope, to the directories path including the repository identifier. - :params file: the basename of the cache file to copy + :params path: the basename of the cache file to copy (example: "files" or "chunks") as a string :returns: the borg file that was created or None if non was created. """ - attic_file = os.path.join(attic_cache_dir, file) + attic_file = os.path.join(attic_cache_dir, path) if os.path.exists(attic_file): - borg_file = os.path.join(borg_cache_dir, file) + borg_file = os.path.join(borg_cache_dir, path) if os.path.exists(borg_file): print("borg cache file already exists in %s, skipping conversion of %s" % (borg_file, attic_file)) else: @@ -173,7 +175,7 @@ class AtticRepositoryConverter(Repository): shutil.copyfile(attic_file, borg_file) return borg_file else: - print("no %s cache file found in %s" % (file, attic_file)) + print("no %s cache file found in %s" % (path, attic_file)) return None if os.path.exists(attic_cache_dir): @@ -183,10 +185,10 @@ class AtticRepositoryConverter(Repository): # XXX: untested, because generating cache files is a PITA, see # Archiver.do_create() for proof - for cache in [ 'files', 'chunks' ]: + for cache in ['files', 'chunks']: copied = copy_cache_file(cache) if copied: - caches += [copied] + caches.append(copied) for cache in caches: print("converting cache %s" % cache) From 48b7c8cea3abe8c0dc8f8cb7d4dd549489659094 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 11:52:12 -0400 Subject: [PATCH 60/63] avoid checking for non-existent files if there's no attic cache, it's no use checking for individual files this also makes the code a little clearer also added comments --- borg/converter.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 14aedb9bb..402ea8b42 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -178,22 +178,26 @@ class AtticRepositoryConverter(Repository): print("no %s cache file found in %s" % (path, attic_file)) return None + # XXX: untested, because generating cache files is a PITA, see + # Archiver.do_create() for proof if os.path.exists(attic_cache_dir): if not os.path.exists(borg_cache_dir): os.makedirs(borg_cache_dir) + + # non-binary file that we don't need to convert, just copy copy_cache_file('config') - # XXX: untested, because generating cache files is a PITA, see - # Archiver.do_create() for proof - for cache in ['files', 'chunks']: - copied = copy_cache_file(cache) - if copied: - caches.append(copied) + # we need to convert the headers of those files, copy first + for cache in ['files', 'chunks']: + copied = copy_cache_file(cache) + if copied: + caches.append(copied) - for cache in caches: - print("converting cache %s" % cache) - if not dryrun: - AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + # actually convert the headers of the detected files + for cache in caches: + print("converting cache %s" % cache) + if not dryrun: + AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') class AtticKeyfileKey(KeyfileKey): From c91c5d0029cb364168533d33a6ee28c27b9f1340 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 12:36:52 -0400 Subject: [PATCH 61/63] rename convert command to upgrade convert is too generic for the Attic conversion: we may have other converters, from other, more foreign systems that will require different options and different upgrade mechanisms that convert could never cover appropriately. we are more likely to use an approach similar to "git fast-import" instead here, and have the conversion tools be external tool that feed standard data into borg during conversion. upgrade seems like a more natural fit: Attic could be considered like a pre-historic version of Borg that requires invasive changes for borg to be able to use the repository. we may require such changes in the future of borg as well: if we make backwards-incompatible changes to the repository layout or data format, it is possible that we require such changes to be performed on the repository before it is usable again. instead of scattering those conversions all over the code, we should simply have assertions that check the layout is correct and point the user to upgrade if it is not. upgrade should eventually automatically detect the repository format or version and perform appropriate conversions. Attic is only the first one. we still need to implement an adequate API for auto-detection and upgrade, only the seeds of that are present for now. of course, changes to the upgrade command should be thoroughly documented in the release notes and an eventual upgrade manual. --- borg/archiver.py | 39 +++++++++++++------- borg/testsuite/{converter.py => upgrader.py} | 12 +++--- borg/{converter.py => upgrader.py} | 10 ++--- 3 files changed, 36 insertions(+), 25 deletions(-) rename borg/testsuite/{converter.py => upgrader.py} (93%) rename borg/{converter.py => upgrader.py} (96%) diff --git a/borg/archiver.py b/borg/archiver.py index 2f5325257..202ae0ef6 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -17,7 +17,7 @@ import traceback from . import __version__ from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS from .compress import Compressor, COMPR_BUFFER -from .converter import AtticRepositoryConverter +from .upgrader import AtticRepositoryUpgrader from .repository import Repository from .cache import Cache from .key import key_creator @@ -463,11 +463,20 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") stats.print_('Deleted data:', cache) return self.exit_code - def do_convert(self, args): - """convert a repository from attic to borg""" - repo = AtticRepositoryConverter(args.repository.path, create=False) + def do_upgrade(self, args): + """upgrade a repository from a previous version""" + # XXX: currently only upgrades from Attic repositories, but may + # eventually be extended to deal with major upgrades for borg + # itself. + # + # in this case, it should auto-detect the current repository + # format and fire up necessary upgrade mechanism. this remains + # to be implemented. + + # XXX: should auto-detect if it is an attic repository here + repo = AtticRepositoryUpgrader(args.repository.path, create=False) try: - repo.convert(args.dry_run) + repo.upgrade(args.dry_run) except NotImplementedError as e: print("warning: %s" % e) return self.exit_code @@ -906,8 +915,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") type=location_validator(archive=False), help='repository to prune') - convert_epilog = textwrap.dedent(""" - convert will convert an existing Attic repository to Borg in place. + upgrade_epilog = textwrap.dedent(""" + upgrade an existing Borg repository in place. this currently + only support converting an Attic repository, but may + eventually be extended to cover major Borg upgrades as well. it will change the magic strings in the repository's segments to match the new Borg magic strings. the keyfiles found in @@ -928,21 +939,21 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") repository, in case something goes wrong, for example: cp -a attic borg - borg convert -n borg - borg convert borg + borg upgrade -n borg + borg upgrade borg you have been warned.""") - subparser = subparsers.add_parser('convert', parents=[common_parser], - description=self.do_convert.__doc__, - epilog=convert_epilog, + subparser = subparsers.add_parser('upgrade', parents=[common_parser], + description=self.do_upgrade.__doc__, + epilog=upgrade_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) - subparser.set_defaults(func=self.do_convert) + subparser.set_defaults(func=self.do_upgrade) subparser.add_argument('-n', '--dry-run', dest='dry_run', default=False, action='store_true', help='do not change repository') subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False), - help='path to the attic repository to be converted') + help='path to the repository to be upgraded') subparser = subparsers.add_parser('help', parents=[common_parser], description='Extra help') diff --git a/borg/testsuite/converter.py b/borg/testsuite/upgrader.py similarity index 93% rename from borg/testsuite/converter.py rename to borg/testsuite/upgrader.py index b7e3748e9..22278f9ac 100644 --- a/borg/testsuite/converter.py +++ b/borg/testsuite/upgrader.py @@ -11,7 +11,7 @@ try: except ImportError: attic = None -from ..converter import AtticRepositoryConverter, AtticKeyfileKey +from ..upgrader import AtticRepositoryUpgrader, AtticKeyfileKey from ..helpers import get_keys_dir from ..key import KeyfileKey from ..repository import Repository, MAGIC @@ -78,7 +78,7 @@ def test_convert_segments(tmpdir, attic_repo): # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") - repo = AtticRepositoryConverter(str(tmpdir), create=False) + repo = AtticRepositoryUpgrader(str(tmpdir), create=False) segments = [filename for i, filename in repo.io.segment_iterator()] repo.close() repo.convert_segments(segments, dryrun=False) @@ -136,9 +136,9 @@ def test_keys(tmpdir, attic_repo, attic_key_file): define above) :param attic_key_file: an attic.key.KeyfileKey (fixture created above) """ - repository = AtticRepositoryConverter(str(tmpdir), create=False) + repository = AtticRepositoryUpgrader(str(tmpdir), create=False) keyfile = AtticKeyfileKey.find_key_file(repository) - AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) + AtticRepositoryUpgrader.convert_keyfiles(keyfile, dryrun=False) assert key_valid(attic_key_file.path) @@ -157,7 +157,7 @@ def test_convert_all(tmpdir, attic_repo, attic_key_file): # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") - repo = AtticRepositoryConverter(str(tmpdir), create=False) - repo.convert(dryrun=False) + repo = AtticRepositoryUpgrader(str(tmpdir), create=False) + repo.upgrade(dryrun=False) assert key_valid(attic_key_file.path) assert repo_valid(tmpdir) diff --git a/borg/converter.py b/borg/upgrader.py similarity index 96% rename from borg/converter.py rename to borg/upgrader.py index 402ea8b42..2efb9216c 100644 --- a/borg/converter.py +++ b/borg/upgrader.py @@ -11,11 +11,11 @@ from .key import KeyfileKey, KeyfileNotFoundError ATTIC_MAGIC = b'ATTICSEG' -class AtticRepositoryConverter(Repository): - def convert(self, dryrun=True): +class AtticRepositoryUpgrader(Repository): + def upgrade(self, dryrun=True): """convert an attic repository to a borg repository - those are the files that need to be converted here, from most + those are the files that need to be upgraded here, from most important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if missing. @@ -62,7 +62,7 @@ class AtticRepositoryConverter(Repository): if dryrun: time.sleep(0.001) else: - AtticRepositoryConverter.header_replace(filename, ATTIC_MAGIC, MAGIC) + AtticRepositoryUpgrader.header_replace(filename, ATTIC_MAGIC, MAGIC) print() @staticmethod @@ -197,7 +197,7 @@ class AtticRepositoryConverter(Repository): for cache in caches: print("converting cache %s" % cache) if not dryrun: - AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + AtticRepositoryUpgrader.header_replace(cache, b'ATTICIDX', b'BORG_IDX') class AtticKeyfileKey(KeyfileKey): From fded2219a8c842b56a80926324cbeee8413409f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 12:46:23 -0400 Subject: [PATCH 62/63] mention borg delete borg this makes it clear how to start from scratch, in case the chunk cache was failed to be copied and so on. --- borg/archiver.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 202ae0ef6..62da098eb 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -931,10 +931,6 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") the first backup after the conversion takes longer than expected due to the cache resync. - the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic - will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as - the magic strings will have changed. - it is recommended you run this on a copy of the Attic repository, in case something goes wrong, for example: @@ -942,6 +938,17 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") borg upgrade -n borg borg upgrade borg + upgrade should be able to resume if interrupted, although it + will still iterate over all segments. if you want to start + from scratch, use `borg delete` over the copied repository to + make sure the cache files are also removed: + + borg delete borg + + the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic + will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as + the magic strings will have changed. + you have been warned.""") subparser = subparsers.add_parser('upgrade', parents=[common_parser], description=self.do_upgrade.__doc__, From 5409cbaa678eda55c7846726f1146be90ea9b648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 12:56:03 -0400 Subject: [PATCH 63/63] also copy files cache verbatim it seems the file cache does *not* have the ATTIC magic header (nor does it have one in borg), so we don't need to edit the file - we just copy it like a regular file. while i'm here, simplify the cache conversion loop: it's no use splitting the copy and the edition since the latter is so fast, just do everything in one loop, which makes it much easier to read. --- borg/upgrader.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/borg/upgrader.py b/borg/upgrader.py index 2efb9216c..33ef2d388 100644 --- a/borg/upgrader.py +++ b/borg/upgrader.py @@ -184,20 +184,17 @@ class AtticRepositoryUpgrader(Repository): if not os.path.exists(borg_cache_dir): os.makedirs(borg_cache_dir) - # non-binary file that we don't need to convert, just copy - copy_cache_file('config') + # file that we don't have a header to convert, just copy + for cache in ['config', 'files']: + copy_cache_file(cache) # we need to convert the headers of those files, copy first - for cache in ['files', 'chunks']: + for cache in ['chunks']: copied = copy_cache_file(cache) if copied: - caches.append(copied) - - # actually convert the headers of the detected files - for cache in caches: - print("converting cache %s" % cache) - if not dryrun: - AtticRepositoryUpgrader.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + print("converting cache %s" % cache) + if not dryrun: + AtticRepositoryUpgrader.header_replace(cache, b'ATTICIDX', b'BORG_IDX') class AtticKeyfileKey(KeyfileKey):