From 8e87f1111b02f35c2ac14c0605b60ccd8db486b8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 6 May 2022 03:59:10 +0200 Subject: [PATCH] cleanup msgpack related str/bytes mess, fixes #968 see ticket and borg.helpers.msgpack docstring. this changeset implements the full migration to msgpack 2.0 spec (use_bin_type=True, raw=False). still needed compat to the past is done via want_bytes decoder in borg.item. --- src/borg/archive.py | 7 +-- src/borg/archiver.py | 2 +- src/borg/crypto/key.py | 10 ++--- src/borg/helpers/msgpack.py | 34 +++++++------- src/borg/item.pyx | 35 +++++++++------ src/borg/remote.py | 77 +++++++++++++++----------------- src/borg/repository.py | 44 +++++++++--------- src/borg/testsuite/archive.py | 14 +++--- src/borg/testsuite/archiver.py | 10 ++--- src/borg/testsuite/key.py | 16 +++---- src/borg/testsuite/repository.py | 4 +- 11 files changed, 124 insertions(+), 129 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index dc244a9f1..ff6ca7ce6 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1718,13 +1718,10 @@ class ArchiveChecker: Iterates through all objects in the repository looking for archive metadata blocks. """ - required_archive_keys = frozenset(key.encode() for key in REQUIRED_ARCHIVE_KEYS) - def valid_archive(obj): if not isinstance(obj, dict): return False - keys = set(obj) - return required_archive_keys.issubset(keys) + return REQUIRED_ARCHIVE_KEYS.issubset(obj) logger.info('Rebuilding missing manifest, this might take some time...') # as we have lost the manifest, we do not know any more what valid item keys we had. @@ -1904,7 +1901,7 @@ class ArchiveChecker: def valid_item(obj): if not isinstance(obj, StableDict): return False, 'not a dictionary' - keys = set(k.decode('utf-8', errors='replace') for k in obj) + keys = set(obj) if not required_item_keys.issubset(keys): return False, 'missing required keys: ' + list_keys_safe(required_item_keys - keys) if not keys.issubset(item_keys): diff --git a/src/borg/archiver.py b/src/borg/archiver.py index acef0db5e..a91e43c54 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -2331,7 +2331,7 @@ class Archiver: unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict) first = True - for item_id in archive_org_dict[b'items']: + for item_id in archive_org_dict['items']: data = key.decrypt(item_id, repository.get(item_id)) unpacker.feed(data) for item in unpacker: diff --git a/src/borg/crypto/key.py b/src/borg/crypto/key.py index 6ca6fbac0..15df53d00 100644 --- a/src/borg/crypto/key.py +++ b/src/borg/crypto/key.py @@ -232,24 +232,24 @@ class KeyBase: unpacker = get_limited_unpacker('manifest') unpacker.feed(data) unpacked = unpacker.unpack() - if b'tam' not in unpacked: + if 'tam' not in unpacked: if tam_required: raise TAMRequiredError(self.repository._location.canonical_path()) else: logger.debug('TAM not found and not required') return unpacked, False - tam = unpacked.pop(b'tam', None) + tam = unpacked.pop('tam', None) if not isinstance(tam, dict): raise TAMInvalid() - tam_type = tam.get(b'type', b'').decode('ascii', 'replace') + tam_type = tam.get('type', '') if tam_type != 'HKDF_HMAC_SHA512': if tam_required: raise TAMUnsupportedSuiteError(repr(tam_type)) else: logger.debug('Ignoring TAM made with unsupported suite, since TAM is not required: %r', tam_type) return unpacked, False - tam_hmac = tam.get(b'hmac') - tam_salt = tam.get(b'salt') + tam_hmac = tam.get('hmac') + tam_salt = tam.get('salt') if not isinstance(tam_salt, bytes) or not isinstance(tam_hmac, bytes): raise TAMInvalid() offset = data.index(tam_hmac) diff --git a/src/borg/helpers/msgpack.py b/src/borg/helpers/msgpack.py index 5a2edecd6..268ee30e7 100644 --- a/src/borg/helpers/msgpack.py +++ b/src/borg/helpers/msgpack.py @@ -2,8 +2,7 @@ wrapping msgpack ================ -Due to the planned breaking api changes in upstream msgpack, we wrap it the way we need it - -to avoid having lots of clutter in the calling code. see tickets #968 and #3632. +We wrap msgpack here the way we need it - to avoid having lots of clutter in the calling code. Packing ------- @@ -22,30 +21,27 @@ Packing Unpacking --------- -- raw = True (the old way, used by borg <= 1.3) - This is currently still needed to not try to decode "raw" msgpack objects. - These could come either from str (new or old msgpack) or bytes (old msgpack). - Thus, we basically must know what we want and either keep the bytes we get - or decode them to str, if we want str. - -- raw = False (the new way) - This can be used in future, when we do not have to deal with data any more that was packed the old way. +- raw = False (used by borg since borg 1.3) + We already can use this with borg 1.3 due to the want_bytes decoder. + This decoder can be removed in future, when we do not have to deal with data any more that was packed the old way. It will then unpack according to the msgpack 2.0 spec format and directly output bytes or str. +- raw = True (the old way, used by borg < 1.3) + - unicode_errors = 'surrogateescape' -> see description above (will be used when raw is False). -As of borg 1.3, we have the first part on the way to fix the msgpack str/bytes mess, #968. -borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely. -But from now on, borg only **writes** new data according to the new msgpack spec, -thus we can complete the fix for #968 in a later borg release. +As of borg 1.3, we have fixed most of the msgpack str/bytes mess, #968. +Borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely. +But from now on, borg only **writes** new data according to the new msgpack 2.0 spec, +thus we can remove some legacy support in a later borg release (some places are marked with "legacy"). current way in msgpack terms ---------------------------- - pack with use_bin_type=True (according to msgpack 2.0 spec) - packs str -> raw and bytes -> bin -- unpack with raw=True (aka "the old way") -- unpacks raw to bytes (thus we always need to decode manually if we want str) +- unpack with raw=False (according to msgpack 2.0 spec, using unicode_errors='surrogateescape') +- unpacks bin to bytes and raw to str (thus we need to re-encode manually if we want bytes from "raw") """ from .datastruct import StableDict @@ -66,8 +62,8 @@ from msgpack import OutOfData version = mp_version USE_BIN_TYPE = True -RAW = True # should become False later when we do not need to read old stuff any more -UNICODE_ERRORS = 'surrogateescape' # previously done by safe_encode, safe_decode +RAW = False +UNICODE_ERRORS = 'surrogateescape' class PackException(Exception): @@ -161,7 +157,7 @@ def unpackb(packed, *, raw=RAW, unicode_errors=UNICODE_ERRORS, def unpack(stream, *, raw=RAW, unicode_errors=UNICODE_ERRORS, strict_map_key=False, **kwargs): - # assert raw == RAW + assert raw == RAW assert unicode_errors == UNICODE_ERRORS try: kw = dict(raw=raw, unicode_errors=unicode_errors, diff --git a/src/borg/item.pyx b/src/borg/item.pyx index 89f476c1a..4a6c81163 100644 --- a/src/borg/item.pyx +++ b/src/borg/item.pyx @@ -60,6 +60,15 @@ def fix_tuple_of_str_and_int(t): return t +def want_bytes(v): + """we know that we want bytes and the value should be bytes""" + # legacy support: it being str can be caused by msgpack unpack decoding old data that was packed with use_bin_type=False + if isinstance(v, str): + v = v.encode('utf-8', errors='surrogateescape') + assert isinstance(v, bytes) + return v + + class PropDict: """ Manage a dictionary via properties. @@ -204,10 +213,10 @@ class Item(PropDict): user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None') group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None') - acl_access = PropDict._make_property('acl_access', bytes) - acl_default = PropDict._make_property('acl_default', bytes) - acl_extended = PropDict._make_property('acl_extended', bytes) - acl_nfs4 = PropDict._make_property('acl_nfs4', bytes) + acl_access = PropDict._make_property('acl_access', bytes, decode=want_bytes) + acl_default = PropDict._make_property('acl_default', bytes, decode=want_bytes) + acl_extended = PropDict._make_property('acl_extended', bytes, decode=want_bytes) + acl_nfs4 = PropDict._make_property('acl_nfs4', bytes, decode=want_bytes) mode = PropDict._make_property('mode', int) uid = PropDict._make_property('uid', int) @@ -224,7 +233,7 @@ class Item(PropDict): # compatibility note: this is a new feature, in old archives size will be missing. size = PropDict._make_property('size', int) - hlid = PropDict._make_property('hlid', bytes) # hard link id: same value means same hard link. + hlid = PropDict._make_property('hlid', bytes, decode=want_bytes) # hard link id: same value means same hard link. hardlink_master = PropDict._make_property('hardlink_master', bool) # legacy chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None') @@ -363,9 +372,9 @@ class EncryptedKey(PropDict): version = PropDict._make_property('version', int) algorithm = PropDict._make_property('algorithm', str) iterations = PropDict._make_property('iterations', int) - salt = PropDict._make_property('salt', bytes) - hash = PropDict._make_property('hash', bytes) - data = PropDict._make_property('data', bytes) + salt = PropDict._make_property('salt', bytes, decode=want_bytes) + hash = PropDict._make_property('hash', bytes, decode=want_bytes) + data = PropDict._make_property('data', bytes, decode=want_bytes) argon2_time_cost = PropDict._make_property('argon2_time_cost', int) argon2_memory_cost = PropDict._make_property('argon2_memory_cost', int) argon2_parallelism = PropDict._make_property('argon2_parallelism', int) @@ -399,10 +408,10 @@ class Key(PropDict): __slots__ = ("_dict", ) # avoid setting attributes not supported by properties version = PropDict._make_property('version', int) - repository_id = PropDict._make_property('repository_id', bytes) - enc_key = PropDict._make_property('enc_key', bytes) - enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes) - id_key = PropDict._make_property('id_key', bytes) + repository_id = PropDict._make_property('repository_id', bytes, decode=want_bytes) + enc_key = PropDict._make_property('enc_key', bytes, decode=want_bytes) + enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes, decode=want_bytes) + id_key = PropDict._make_property('id_key', bytes, decode=want_bytes) chunk_seed = PropDict._make_property('chunk_seed', int) tam_required = PropDict._make_property('tam_required', bool) @@ -443,7 +452,7 @@ class ArchiveItem(PropDict): chunker_params = PropDict._make_property('chunker_params', tuple) recreate_cmdline = PropDict._make_property('recreate_cmdline', list) # list of s-e-str # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2 - recreate_source_id = PropDict._make_property('recreate_source_id', bytes) + recreate_source_id = PropDict._make_property('recreate_source_id', bytes, decode=want_bytes) recreate_args = PropDict._make_property('recreate_args', list) # list of s-e-str recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list) # list of tuples size = PropDict._make_property('size', int) diff --git a/src/borg/remote.py b/src/borg/remote.py index 8de302871..6ea51d3c3 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -38,8 +38,7 @@ logger = create_logger(__name__) RPC_PROTOCOL_VERSION = 2 BORG_VERSION = parse_version(__version__) -MSGID, MSG, ARGS, RESULT = 'i', 'm', 'a', 'r' # pack -MSGIDB, MSGB, ARGSB, RESULTB = b'i', b'm', b'a', b'r' # unpack +MSGID, MSG, ARGS, RESULT = 'i', 'm', 'a', 'r' MAX_INFLIGHT = 100 @@ -139,10 +138,6 @@ compatMap = { } -def decode_keys(d): - return {k.decode(): d[k] for k in d} - - class RepositoryServer: # pragma: no cover rpc_methods = ( '__len__', @@ -217,14 +212,13 @@ class RepositoryServer: # pragma: no cover for unpacked in unpacker: if isinstance(unpacked, dict): dictFormat = True - msgid = unpacked[MSGIDB] - method = unpacked[MSGB].decode() - args = decode_keys(unpacked[ARGSB]) + msgid = unpacked[MSGID] + method = unpacked[MSG] + args = unpacked[ARGS] elif isinstance(unpacked, tuple) and len(unpacked) == 4: dictFormat = False # The first field 'type' was always 1 and has always been ignored _, msgid, method, args = unpacked - method = method.decode() args = self.positional_to_named(method, args) else: if self.repository is not None: @@ -308,7 +302,7 @@ class RepositoryServer: # pragma: no cover # clients since 1.1.0b3 use a dict as client_data # clients since 1.1.0b6 support json log format from server if isinstance(client_data, dict): - self.client_version = client_data[b'client_version'] + self.client_version = client_data['client_version'] level = logging.getLevelName(logging.getLogger('').level) setup_logging(is_serve=True, json=True, level=level) logger.debug('Initialized logging system for JSON-based protocol') @@ -370,7 +364,6 @@ class RepositoryServer: # pragma: no cover return self.repository.id def inject_exception(self, kind): - kind = kind.decode() s1 = 'test string' s2 = 'test string2' if kind == 'DoesNotExist': @@ -484,35 +477,35 @@ class RemoteRepository: class RPCError(Exception): def __init__(self, unpacked): - # for borg < 1.1: unpacked only has b'exception_class' as key - # for borg 1.1+: unpacked has keys: b'exception_args', b'exception_full', b'exception_short', b'sysinfo' + # for borg < 1.1: unpacked only has 'exception_class' as key + # for borg 1.1+: unpacked has keys: 'exception_args', 'exception_full', 'exception_short', 'sysinfo' self.unpacked = unpacked def get_message(self): - if b'exception_short' in self.unpacked: - return b'\n'.join(self.unpacked[b'exception_short']).decode() + if 'exception_short' in self.unpacked: + return '\n'.join(self.unpacked['exception_short']) else: return self.exception_class @property def traceback(self): - return self.unpacked.get(b'exception_trace', True) + return self.unpacked.get('exception_trace', True) @property def exception_class(self): - return self.unpacked[b'exception_class'].decode() + return self.unpacked['exception_class'] @property def exception_full(self): - if b'exception_full' in self.unpacked: - return b'\n'.join(self.unpacked[b'exception_full']).decode() + if 'exception_full' in self.unpacked: + return '\n'.join(self.unpacked['exception_full']) else: return self.get_message() + '\nRemote Exception (see remote log for the traceback)' @property def sysinfo(self): - if b'sysinfo' in self.unpacked: - return self.unpacked[b'sysinfo'].decode() + if 'sysinfo' in self.unpacked: + return self.unpacked['sysinfo'] else: return '' @@ -577,9 +570,9 @@ class RemoteRepository: raise ConnectionClosedWithHint('Is borg working on the server?') from None if version == RPC_PROTOCOL_VERSION: self.dictFormat = False - elif isinstance(version, dict) and b'server_version' in version: + elif isinstance(version, dict) and 'server_version' in version: self.dictFormat = True - self.server_version = version[b'server_version'] + self.server_version = version['server_version'] else: raise Exception('Server insisted on using unsupported protocol version %s' % version) @@ -734,9 +727,9 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+. return msgid def handle_error(unpacked): - error = unpacked[b'exception_class'].decode() - old_server = b'exception_args' not in unpacked - args = unpacked.get(b'exception_args') + error = unpacked['exception_class'] + old_server = 'exception_args' not in unpacked + args = unpacked.get('exception_args') if error == 'DoesNotExist': raise Repository.DoesNotExist(self.location.processed) @@ -748,29 +741,29 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+. if old_server: raise IntegrityError('(not available)') else: - raise IntegrityError(args[0].decode()) + raise IntegrityError(args[0]) elif error == 'AtticRepository': if old_server: raise Repository.AtticRepository('(not available)') else: - raise Repository.AtticRepository(args[0].decode()) + raise Repository.AtticRepository(args[0]) elif error == 'PathNotAllowed': if old_server: raise PathNotAllowed('(unknown)') else: - raise PathNotAllowed(args[0].decode()) + raise PathNotAllowed(args[0]) elif error == 'ParentPathDoesNotExist': - raise Repository.ParentPathDoesNotExist(args[0].decode()) + raise Repository.ParentPathDoesNotExist(args[0]) elif error == 'ObjectNotFound': if old_server: raise Repository.ObjectNotFound('(not available)', self.location.processed) else: - raise Repository.ObjectNotFound(args[0].decode(), self.location.processed) + raise Repository.ObjectNotFound(args[0], self.location.processed) elif error == 'InvalidRPCMethod': if old_server: raise InvalidRPCMethod('(not available)') else: - raise InvalidRPCMethod(args[0].decode()) + raise InvalidRPCMethod(args[0]) else: raise self.RPCError(unpacked) @@ -789,10 +782,10 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+. try: unpacked = self.responses.pop(waiting_for[0]) waiting_for.pop(0) - if b'exception_class' in unpacked: + if 'exception_class' in unpacked: handle_error(unpacked) else: - yield unpacked[RESULTB] + yield unpacked[RESULT] if not waiting_for and not calls: return except KeyError: @@ -809,10 +802,10 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+. else: return else: - if b'exception_class' in unpacked: + if 'exception_class' in unpacked: handle_error(unpacked) else: - yield unpacked[RESULTB] + yield unpacked[RESULT] if self.to_send or ((calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT): w_fds = [self.stdin_fd] else: @@ -829,26 +822,26 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+. self.unpacker.feed(data) for unpacked in self.unpacker: if isinstance(unpacked, dict): - msgid = unpacked[MSGIDB] + msgid = unpacked[MSGID] elif isinstance(unpacked, tuple) and len(unpacked) == 4: # The first field 'type' was always 1 and has always been ignored _, msgid, error, res = unpacked if error: # ignore res, because it is only a fixed string anyway. - unpacked = {MSGIDB: msgid, b'exception_class': error} + unpacked = {MSGID: msgid, 'exception_class': error} else: - unpacked = {MSGIDB: msgid, RESULTB: res} + unpacked = {MSGID: msgid, RESULT: res} else: raise UnexpectedRPCDataFormatFromServer(data) if msgid in self.ignore_responses: self.ignore_responses.remove(msgid) # async methods never return values, but may raise exceptions. - if b'exception_class' in unpacked: + if 'exception_class' in unpacked: self.async_responses[msgid] = unpacked else: # we currently do not have async result values except "None", # so we do not add them into async_responses. - if unpacked[RESULTB] is not None: + if unpacked[RESULT] is not None: self.async_responses[msgid] = unpacked else: self.responses[msgid] = unpacked diff --git a/src/borg/repository.py b/src/borg/repository.py index 9267fe0e6..3fcc72aad 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -516,16 +516,16 @@ class Repository: integrity = msgpack.unpack(fd) except FileNotFoundError: return - if integrity.get(b'version') != 2: - logger.warning('Unknown integrity data version %r in %s', integrity.get(b'version'), integrity_file) + if integrity.get('version') != 2: + logger.warning('Unknown integrity data version %r in %s', integrity.get('version'), integrity_file) return - return integrity[key].decode() + return integrity[key] def open_index(self, transaction_id, auto_recover=True): if transaction_id is None: return NSIndex() index_path = os.path.join(self.path, 'index.%d' % transaction_id) - integrity_data = self._read_integrity(transaction_id, b'index') + integrity_data = self._read_integrity(transaction_id, 'index') try: with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd: return NSIndex.read(fd) @@ -575,7 +575,7 @@ class Repository: self.io.cleanup(transaction_id) hints_path = os.path.join(self.path, 'hints.%d' % transaction_id) index_path = os.path.join(self.path, 'index.%d' % transaction_id) - integrity_data = self._read_integrity(transaction_id, b'hints') + integrity_data = self._read_integrity(transaction_id, 'hints') try: with IntegrityCheckedFile(hints_path, write=False, integrity_data=integrity_data) as fd: hints = msgpack.unpack(fd) @@ -588,23 +588,23 @@ class Repository: self.check_transaction() self.prepare_txn(transaction_id) return - if hints[b'version'] == 1: + if hints['version'] == 1: logger.debug('Upgrading from v1 hints.%d', transaction_id) - self.segments = hints[b'segments'] + self.segments = hints['segments'] self.compact = FreeSpace() self.storage_quota_use = 0 self.shadow_index = {} - for segment in sorted(hints[b'compact']): + for segment in sorted(hints['compact']): logger.debug('Rebuilding sparse info for segment %d', segment) self._rebuild_sparse(segment) logger.debug('Upgrade to v2 hints complete') - elif hints[b'version'] != 2: - raise ValueError('Unknown hints file version: %d' % hints[b'version']) + elif hints['version'] != 2: + raise ValueError('Unknown hints file version: %d' % hints['version']) else: - self.segments = hints[b'segments'] - self.compact = FreeSpace(hints[b'compact']) - self.storage_quota_use = hints.get(b'storage_quota_use', 0) - self.shadow_index = hints.get(b'shadow_index', {}) + self.segments = hints['segments'] + self.compact = FreeSpace(hints['compact']) + self.storage_quota_use = hints.get('storage_quota_use', 0) + self.shadow_index = hints.get('shadow_index', {}) self.log_storage_quota() # Drop uncommitted segments in the shadow index for key, shadowed_segments in self.shadow_index.items(): @@ -621,16 +621,16 @@ class Repository: os.rename(file + '.tmp', file) hints = { - b'version': 2, - b'segments': self.segments, - b'compact': self.compact, - b'storage_quota_use': self.storage_quota_use, - b'shadow_index': self.shadow_index, + 'version': 2, + 'segments': self.segments, + 'compact': self.compact, + 'storage_quota_use': self.storage_quota_use, + 'shadow_index': self.shadow_index, } integrity = { # Integrity version started at 2, the current hints version. # Thus, integrity version == hints version, for now. - b'version': 2, + 'version': 2, } transaction_id = self.io.get_segments_transaction_id() assert transaction_id is not None @@ -647,7 +647,7 @@ class Repository: with IntegrityCheckedFile(hints_file + '.tmp', filename=hints_name, write=True) as fd: msgpack.pack(hints, fd) flush_and_sync(fd) - integrity[b'hints'] = fd.integrity_data + integrity['hints'] = fd.integrity_data # Write repository index index_name = 'index.%d' % transaction_id @@ -656,7 +656,7 @@ class Repository: # XXX: Consider using SyncFile for index write-outs. self.index.write(fd) flush_and_sync(fd) - integrity[b'index'] = fd.integrity_data + integrity['index'] = fd.integrity_data # Write integrity file, containing checksums of the hints and index files integrity_name = 'integrity.%d' % transaction_id diff --git a/src/borg/testsuite/archive.py b/src/borg/testsuite/archive.py index 0eed9f7e8..9cdcf5046 100644 --- a/src/borg/testsuite/archive.py +++ b/src/borg/testsuite/archive.py @@ -171,7 +171,7 @@ class RobustUnpackerTestCase(BaseTestCase): return b''.join(msgpack.packb({'path': item}) for item in items) def _validator(self, value): - return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz') + return isinstance(value, dict) and value.get('path') in ('foo', 'bar', 'boo', 'baz') def process(self, input): unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS) @@ -190,10 +190,10 @@ class RobustUnpackerTestCase(BaseTestCase): (False, [b'garbage'] + [self.make_chunks(['boo', 'baz'])])] result = self.process(chunks) self.assert_equal(result, [ - {b'path': b'foo'}, {b'path': b'bar'}, + {'path': 'foo'}, {'path': 'bar'}, 103, 97, 114, 98, 97, 103, 101, - {b'path': b'boo'}, - {b'path': b'baz'}]) + {'path': 'boo'}, + {'path': 'baz'}]) def split(self, left, length): parts = [] @@ -206,19 +206,19 @@ class RobustUnpackerTestCase(BaseTestCase): chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 2) input = [(False, chunks)] result = self.process(input) - self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'bar'}, {b'path': b'boo'}, {b'path': b'baz'}]) + self.assert_equal(result, [{'path': 'foo'}, {'path': 'bar'}, {'path': 'boo'}, {'path': 'baz'}]) def test_missing_chunk(self): chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4) input = [(False, chunks[:3]), (True, chunks[4:])] result = self.process(input) - self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}]) + self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}]) def test_corrupt_chunk(self): chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4) input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])] result = self.process(input) - self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}]) + self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}]) @pytest.fixture diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index f3315b676..a4205ea76 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -3623,14 +3623,14 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02 self.cmd('init', '--encryption=repokey', self.repository_location) with Repository(self.repository_path) as repository: key = msgpack.unpackb(a2b_base64(repository.load_key())) - assert key[b'algorithm'] == b'argon2 chacha20-poly1305' + assert key['algorithm'] == 'argon2 chacha20-poly1305' def test_init_with_explicit_key_algorithm(self): """https://github.com/borgbackup/borg/issues/747#issuecomment-1076160401""" self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location) with Repository(self.repository_path) as repository: key = msgpack.unpackb(a2b_base64(repository.load_key())) - assert key[b'algorithm'] == b'sha256' + assert key['algorithm'] == 'sha256' def verify_change_passphrase_does_not_change_algorithm(self, given_algorithm, expected_algorithm): self.cmd('init', '--encryption=repokey', '--key-algorithm', given_algorithm, self.repository_location) @@ -3640,7 +3640,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02 with Repository(self.repository_path) as repository: key = msgpack.unpackb(a2b_base64(repository.load_key())) - assert key[b'algorithm'] == expected_algorithm.encode() + assert key['algorithm'] == expected_algorithm def test_change_passphrase_does_not_change_algorithm_argon2(self): self.verify_change_passphrase_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305') @@ -3655,7 +3655,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02 with Repository(self.repository_path) as repository: key = msgpack.unpackb(a2b_base64(repository.load_key())) - assert key[b'algorithm'] == expected_algorithm.encode() + assert key['algorithm'] == expected_algorithm def test_change_location_does_not_change_algorithm_argon2(self): self.verify_change_location_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305') @@ -3969,7 +3969,7 @@ class ManifestAuthenticationTest(ArchiverTestCaseBase): key.change_passphrase(key._passphrase) manifest = msgpack.unpackb(key.decrypt(Manifest.MANIFEST_ID, repository.get(Manifest.MANIFEST_ID))) - del manifest[b'tam'] + del manifest['tam'] repository.put(Manifest.MANIFEST_ID, key.encrypt(Manifest.MANIFEST_ID, msgpack.packb(manifest))) repository.commit(compact=False) output = self.cmd('list', '--debug', self.repository_location) diff --git a/src/borg/testsuite/key.py b/src/borg/testsuite/key.py index 5073c5b23..02eaa86e5 100644 --- a/src/borg/testsuite/key.py +++ b/src/borg/testsuite/key.py @@ -360,23 +360,23 @@ class TestTAM: assert blob.startswith(b'\x82') unpacked = msgpack.unpackb(blob) - assert unpacked[b'tam'][b'type'] == b'HKDF_HMAC_SHA512' + assert unpacked['tam']['type'] == 'HKDF_HMAC_SHA512' unpacked, verified = key.unpack_and_verify_manifest(blob) assert verified - assert unpacked[b'foo'] == b'bar' - assert b'tam' not in unpacked + assert unpacked['foo'] == 'bar' + assert 'tam' not in unpacked - @pytest.mark.parametrize('which', (b'hmac', b'salt')) + @pytest.mark.parametrize('which', ('hmac', 'salt')) def test_tampered(self, key, which): data = {'foo': 'bar'} blob = key.pack_and_authenticate_metadata(data) assert blob.startswith(b'\x82') unpacked = msgpack.unpackb(blob, object_hook=StableDict) - assert len(unpacked[b'tam'][which]) == 64 - unpacked[b'tam'][which] = unpacked[b'tam'][which][0:32] + bytes(32) - assert len(unpacked[b'tam'][which]) == 64 + assert len(unpacked['tam'][which]) == 64 + unpacked['tam'][which] = unpacked['tam'][which][0:32] + bytes(32) + assert len(unpacked['tam'][which]) == 64 blob = msgpack.packb(unpacked) with pytest.raises(TAMInvalid): @@ -421,4 +421,4 @@ def test_key_file_roundtrip(monkeypatch, cli_argument, expected_algorithm): load_me = RepoKey.detect(repository, manifest_data=None) assert to_dict(load_me) == to_dict(save_me) - assert msgpack.unpackb(a2b_base64(saved))[b'algorithm'] == expected_algorithm.encode() + assert msgpack.unpackb(a2b_base64(saved))['algorithm'] == expected_algorithm diff --git a/src/borg/testsuite/repository.py b/src/borg/testsuite/repository.py index b4944e58a..52f03e668 100644 --- a/src/borg/testsuite/repository.py +++ b/src/borg/testsuite/repository.py @@ -655,8 +655,8 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase): hints = msgpack.unpack(fd) fd.seek(0) # Corrupt segment refcount - assert hints[b'segments'][2] == 1 - hints[b'segments'][2] = 0 + assert hints['segments'][2] == 1 + hints['segments'][2] = 0 msgpack.pack(hints, fd) fd.truncate()