From c515d6018ddbb73e8b35029c25f707c9e4f66f82 Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Thu, 30 Jun 2016 11:59:12 -0400 Subject: [PATCH 01/20] Add --append-only to borg serve Fixes #1168 --- borg/archiver.py | 7 +++++-- borg/remote.py | 5 +++-- borg/repository.py | 7 +++++-- borg/testsuite/repository.py | 5 ++++- docs/usage.rst | 7 +++++++ 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 8aff7c2fc..d8f8958e5 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -115,7 +115,7 @@ class Archiver: def do_serve(self, args): """Start in server mode. This command is usually not used manually. """ - return RepositoryServer(restrict_to_paths=args.restrict_to_paths).serve() + return RepositoryServer(restrict_to_paths=args.restrict_to_paths, append_only=args.append_only).serve() @with_repository(create=True, exclusive=True, manifest=False) def do_init(self, args, repository): @@ -916,6 +916,8 @@ class Archiver: subparser.set_defaults(func=self.do_serve) subparser.add_argument('--restrict-to-path', dest='restrict_to_paths', action='append', metavar='PATH', help='restrict repository access to PATH') + subparser.add_argument('--append-only', dest='append_only', action='store_true', + help='only allow appending to repository segment files') init_epilog = textwrap.dedent(""" This command initializes an empty repository. A repository is a filesystem directory containing the deduplicated data from zero or more archives. @@ -1491,8 +1493,9 @@ class Archiver: if result.func != forced_result.func: # someone is trying to execute a different borg subcommand, don't do that! return forced_result - # the only thing we take from the forced "borg serve" ssh command is --restrict-to-path + # we only take specific options from the forced "borg serve" command: result.restrict_to_paths = forced_result.restrict_to_paths + result.append_only = forced_result.append_only return result def parse_args(self, args=None): diff --git a/borg/remote.py b/borg/remote.py index e95c38978..85012382a 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -54,9 +54,10 @@ class RepositoryServer: # pragma: no cover 'break_lock', ) - def __init__(self, restrict_to_paths): + def __init__(self, restrict_to_paths, append_only): self.repository = None self.restrict_to_paths = restrict_to_paths + self.append_only = append_only def serve(self): stdin_fd = sys.stdin.fileno() @@ -123,7 +124,7 @@ class RepositoryServer: # pragma: no cover break else: raise PathNotAllowed(path) - self.repository = Repository(path, create, lock_wait=lock_wait, lock=lock) + self.repository = Repository(path, create, lock_wait=lock_wait, lock=lock, append_only=self.append_only) self.repository.__enter__() # clean exit handled by serve() method return self.repository.id diff --git a/borg/repository.py b/borg/repository.py index 25f9ccc19..239fd1965 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -53,7 +53,7 @@ class Repository: class ObjectNotFound(ErrorWithTraceback): """Object with key {} not found in repository {}.""" - def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True): + def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False): self.path = os.path.abspath(path) self._location = Location('file://%s' % self.path) self.io = None @@ -64,6 +64,7 @@ class Repository: self.do_lock = lock self.do_create = create self.exclusive = exclusive + self.append_only = append_only def __del__(self): if self.lock: @@ -169,7 +170,9 @@ class Repository: raise self.InvalidRepository(path) self.max_segment_size = self.config.getint('repository', 'max_segment_size') self.segments_per_dir = self.config.getint('repository', 'segments_per_dir') - self.append_only = self.config.getboolean('repository', 'append_only', fallback=False) + # append_only can be set in the constructor + # it shouldn't be overridden (True -> False) here + self.append_only = self.append_only or self.config.getboolean('repository', 'append_only', fallback=False) self.id = unhexlify(self.config.get('repository', 'id').strip()) self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir) diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 0f384380a..b72e80414 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -201,11 +201,14 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase): + def open(self, create=False): + return Repository(os.path.join(self.tmppath, 'repository'), create=create, append_only=True) + def test_destroy_append_only(self): # Can't destroy append only repo (via the API) - self.repository.append_only = True with self.assert_raises(ValueError): self.repository.destroy() + assert self.repository.append_only def test_append_only(self): def segments_in_repository(): diff --git a/docs/usage.rst b/docs/usage.rst index b7863d9a9..e1a30060b 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -727,6 +727,13 @@ To activate append-only mode, edit the repository ``config`` file and add a line In append-only mode Borg will create a transaction log in the ``transactions`` file, where each line is a transaction and a UTC timestamp. +In addition, ``borg serve`` can act as if a repository is in append-only mode with +its option ``--append-only``. This can be very useful for fine-tuning access control +in ``.ssh/authorized_keys`` :: + + command="borg serve --append-only ..." ssh-rsa + command="borg serve ..." ssh-rsa + Example +++++++ From 58e42deaa0e2506533cea1a02e4e2c7a771d9314 Mon Sep 17 00:00:00 2001 From: Lee Bousfield Date: Thu, 30 Jun 2016 18:00:39 -0400 Subject: [PATCH 02/20] Add .eggs to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2d77951bd..cfb12d0a6 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ borg.dist/ borg.exe .coverage .vagrant +.eggs From 5e260fdfda75285fde3642e462a935145faebf3f Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 1 Jul 2016 00:13:53 +0200 Subject: [PATCH 03/20] Rename input_io*() -> backup_io*() --- borg/archive.py | 16 ++++++++-------- borg/archiver.py | 6 +++--- borg/testsuite/archive.py | 12 ++++++------ 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 870069953..eba4bb7f1 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -46,7 +46,7 @@ flags_normal = os.O_RDONLY | getattr(os, 'O_BINARY', 0) flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0) -class InputOSError(Exception): +class BackupOSError(Exception): """Wrapper for OSError raised while accessing input files.""" def __init__(self, os_error): self.os_error = os_error @@ -59,18 +59,18 @@ class InputOSError(Exception): @contextmanager -def input_io(): - """Context manager changing OSError to InputOSError.""" +def backup_io(): + """Context manager changing OSError to BackupOSError.""" try: yield except OSError as os_error: - raise InputOSError(os_error) from os_error + raise BackupOSError(os_error) from os_error def input_io_iter(iterator): while True: try: - with input_io(): + with backup_io(): item = next(iterator) except StopIteration: return @@ -496,13 +496,13 @@ Number of files: {0.stats.nfiles}'''.format( } if self.numeric_owner: item[b'user'] = item[b'group'] = None - with input_io(): + with backup_io(): xattrs = xattr.get_all(path, follow_symlinks=False) if xattrs: item[b'xattrs'] = StableDict(xattrs) if has_lchflags and st.st_flags: item[b'bsdflags'] = st.st_flags - with input_io(): + with backup_io(): acl_get(path, item, st, self.numeric_owner) return item @@ -586,7 +586,7 @@ Number of files: {0.stats.nfiles}'''.format( item = {b'path': safe_path} # Only chunkify the file if needed if chunks is None: - with input_io(): + with backup_io(): fh = Archive._open_rb(path) with os.fdopen(fh, 'rb') as fd: chunks = [] diff --git a/borg/archiver.py b/borg/archiver.py index 8aff7c2fc..b29ff5af0 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -29,7 +29,7 @@ from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader from .repository import Repository from .cache import Cache from .key import key_creator, RepoKey, PassphraseKey -from .archive import input_io, InputOSError, Archive, ArchiveChecker, CHUNKER_PARAMS +from .archive import backup_io, BackupOSError, Archive, ArchiveChecker, CHUNKER_PARAMS from .remote import RepositoryServer, RemoteRepository, cache_if_remote has_lchflags = hasattr(os, 'lchflags') @@ -198,7 +198,7 @@ class Archiver: if not dry_run: try: status = archive.process_stdin(path, cache) - except InputOSError as e: + except BackupOSError as e: status = 'E' self.print_warning('%s: %s', path, e) else: @@ -281,7 +281,7 @@ class Archiver: if not dry_run: try: status = archive.process_file(path, st, cache, self.ignore_inode) - except InputOSError as e: + except BackupOSError as e: status = 'E' self.print_warning('%s: %s', path, e) elif stat.S_ISDIR(st.st_mode): diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index 229ff8bda..2b3961be5 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -5,7 +5,7 @@ import msgpack import pytest from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS -from ..archive import InputOSError, input_io, input_io_iter +from ..archive import BackupOSError, backup_io, input_io_iter from ..key import PlaintextKey from ..helpers import Manifest from . import BaseTestCase @@ -148,13 +148,13 @@ def test_key_length_msgpacked_items(): assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized) -def test_input_io(): - with pytest.raises(InputOSError): - with input_io(): +def test_backup_io(): + with pytest.raises(BackupOSError): + with backup_io(): raise OSError(123) -def test_input_io_iter(): +def test_backup_io_iter(): class Iterator: def __init__(self, exc): self.exc = exc @@ -163,7 +163,7 @@ def test_input_io_iter(): raise self.exc() oserror_iterator = Iterator(OSError) - with pytest.raises(InputOSError): + with pytest.raises(BackupOSError): for _ in input_io_iter(oserror_iterator): pass From b241f95a4b1c5f9a3b4001fd87c0339bd4152f66 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 1 Jul 2016 00:14:10 +0200 Subject: [PATCH 04/20] Implement fail-safe error handling for borg-extract Note that this isn't nearly as critical as the other error handling bug, since nothing is written. So it's "merely" misleading error reporting. --- borg/archive.py | 92 ++++++++++++++++++++++++++++-------------------- borg/archiver.py | 2 +- 2 files changed, 54 insertions(+), 40 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index eba4bb7f1..bf65d49ee 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -356,54 +356,68 @@ Number of files: {0.stats.nfiles}'''.format( mode = item[b'mode'] if stat.S_ISREG(mode): if not os.path.exists(os.path.dirname(path)): - os.makedirs(os.path.dirname(path)) + with backup_io(): + os.makedirs(os.path.dirname(path)) # Hard link? if b'source' in item: source = os.path.join(dest, item[b'source']) - if os.path.exists(path): - os.unlink(path) - os.link(source, path) + with backup_io(): + if os.path.exists(path): + os.unlink(path) + os.link(source, path) else: - with open(path, 'wb') as fd: + with backup_io(): + fd = open(path, 'wb') + with fd: ids = [c[0] for c in item[b'chunks']] for data in self.pipeline.fetch_many(ids, is_preloaded=True): - if sparse and self.zeros.startswith(data): - # all-zero chunk: create a hole in a sparse file - fd.seek(len(data), 1) - else: - fd.write(data) - pos = fd.tell() - fd.truncate(pos) - fd.flush() - self.restore_attrs(path, item, fd=fd.fileno()) - elif stat.S_ISDIR(mode): - if not os.path.exists(path): - os.makedirs(path) - if restore_attrs: + with backup_io(): + if sparse and self.zeros.startswith(data): + # all-zero chunk: create a hole in a sparse file + fd.seek(len(data), 1) + else: + fd.write(data) + with backup_io(): + pos = fd.tell() + fd.truncate(pos) + fd.flush() + self.restore_attrs(path, item, fd=fd.fileno()) + return + with backup_io(): + # No repository access beyond this point. + if stat.S_ISDIR(mode): + if not os.path.exists(path): + os.makedirs(path) + if restore_attrs: + self.restore_attrs(path, item) + elif stat.S_ISLNK(mode): + if not os.path.exists(os.path.dirname(path)): + os.makedirs(os.path.dirname(path)) + source = item[b'source'] + if os.path.exists(path): + os.unlink(path) + try: + os.symlink(source, path) + except UnicodeEncodeError: + raise self.IncompatibleFilesystemEncodingError(source, sys.getfilesystemencoding()) from None + self.restore_attrs(path, item, symlink=True) + elif stat.S_ISFIFO(mode): + if not os.path.exists(os.path.dirname(path)): + os.makedirs(os.path.dirname(path)) + os.mkfifo(path) self.restore_attrs(path, item) - elif stat.S_ISLNK(mode): - if not os.path.exists(os.path.dirname(path)): - os.makedirs(os.path.dirname(path)) - source = item[b'source'] - if os.path.exists(path): - os.unlink(path) - try: - os.symlink(source, path) - except UnicodeEncodeError: - raise self.IncompatibleFilesystemEncodingError(source, sys.getfilesystemencoding()) from None - self.restore_attrs(path, item, symlink=True) - elif stat.S_ISFIFO(mode): - if not os.path.exists(os.path.dirname(path)): - os.makedirs(os.path.dirname(path)) - os.mkfifo(path) - self.restore_attrs(path, item) - elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode): - os.mknod(path, item[b'mode'], item[b'rdev']) - self.restore_attrs(path, item) - else: - raise Exception('Unknown archive item type %r' % item[b'mode']) + elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode): + os.mknod(path, item[b'mode'], item[b'rdev']) + self.restore_attrs(path, item) + else: + raise Exception('Unknown archive item type %r' % item[b'mode']) def restore_attrs(self, path, item, symlink=False, fd=None): + """ + Restore filesystem attributes on *path* from *item* (*fd*). + + Does not access the repository. + """ uid = gid = None if not self.numeric_owner: uid = user2uid(item[b'user']) diff --git a/borg/archiver.py b/borg/archiver.py index b29ff5af0..ac4ad9a41 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -384,7 +384,7 @@ class Archiver: archive.extract_item(item, restore_attrs=False) else: archive.extract_item(item, stdout=stdout, sparse=sparse) - except OSError as e: + except BackupOSError as e: self.print_warning('%s: %s', remove_surrogates(orig_path), e) if not args.dry_run: From e365d64718f5304c3e2561d86ab277019a7ca3c7 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 1 Jul 2016 00:06:21 +0200 Subject: [PATCH 05/20] Add missing error handler in directory attr restore loop (1/2) --- borg/archiver.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/borg/archiver.py b/borg/archiver.py index ac4ad9a41..594bc6162 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -389,7 +389,11 @@ class Archiver: if not args.dry_run: while dirs: - archive.extract_item(dirs.pop(-1)) + dir_item = dirs.pop(-1) + try: + archive.extract_item(dir_item) + except BackupOSError as e: + self.print_warning('%s: %s', remove_surrogates(dir_item[b'path']), e) for pattern in include_patterns: if pattern.match_count == 0: self.print_warning("Include pattern '%s' never matched.", pattern) From 26bf500566ff834953ed364a112d72f4c5b259af Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 1 Jul 2016 00:07:38 +0200 Subject: [PATCH 06/20] Add missing error handler in directory attr restore loop (2/2) --- borg/archiver.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/borg/archiver.py b/borg/archiver.py index 594bc6162..5f01a39fa 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -372,7 +372,11 @@ class Archiver: continue if not args.dry_run: while dirs and not item[b'path'].startswith(dirs[-1][b'path']): - archive.extract_item(dirs.pop(-1), stdout=stdout) + dir_item = dirs.pop(-1) + try: + archive.extract_item(dir_item, stdout=stdout) + except BackupOSError as e: + self.print_warning('%s: %s', remove_surrogates(dir_item[b'path']), e) if output_list: logger.info(remove_surrogates(orig_path)) try: From fde5a60549a735adecd5a3297d077b5deb3694ad Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 1 Jul 2016 01:11:12 +0200 Subject: [PATCH 07/20] make sure data hits disk before commit tag, fixes #1236 --- borg/repository.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/borg/repository.py b/borg/repository.py index 239fd1965..67e4b1f27 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -727,8 +727,11 @@ class LoggedIO: fd = self.get_write_fd(no_new=True) header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT) crc = self.crc_fmt.pack(crc32(header) & 0xffffffff) + # first fsync(fd) here (to ensure data supposedly hits the disk before the commit tag) + fd.flush() + os.fsync(fd.fileno()) fd.write(b''.join((crc, header))) - self.close_segment() + self.close_segment() # after-commit fsync() def close_segment(self): if self._write_fd: From 99566a31c01a1ec9850816a983d96c1f9c227d26 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 1 Jul 2016 02:07:30 +0200 Subject: [PATCH 08/20] sync the containing directory also --- borg/platform.py | 11 +++++++++++ borg/repository.py | 3 +++ 2 files changed, 14 insertions(+) diff --git a/borg/platform.py b/borg/platform.py index 1bc8ee5e4..d6c2e55d8 100644 --- a/borg/platform.py +++ b/borg/platform.py @@ -1,5 +1,16 @@ +import os import sys + +# POSIX-only, from borg 1.1 platform.base +def sync_dir(path): + fd = os.open(path, os.O_RDONLY) + try: + os.fsync(fd) + finally: + os.close(fd) + + if sys.platform.startswith('linux'): # pragma: linux only from .platform_linux import acl_get, acl_set, API_VERSION elif sys.platform.startswith('freebsd'): # pragma: freebsd only diff --git a/borg/repository.py b/borg/repository.py index 67e4b1f27..275802751 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -16,6 +16,7 @@ from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, Progre from .hashindex import NSIndex from .locking import UpgradableLock, LockError, LockErrorT from .lrucache import LRUCache +from .platform import sync_dir MAX_OBJECT_SIZE = 20 * 1024 * 1024 MAGIC = b'BORG_SEG' @@ -600,6 +601,7 @@ class LoggedIO: dirname = os.path.join(self.path, 'data', str(self.segment // self.segments_per_dir)) if not os.path.exists(dirname): os.mkdir(dirname) + sync_dir(os.path.join(self.path, 'data')) self._write_fd = open(self.segment_filename(self.segment), 'ab') self._write_fd.write(MAGIC) self.offset = MAGIC_LEN @@ -744,4 +746,5 @@ class LoggedIO: # avoids spoiling the cache for the OS and other processes. os.posix_fadvise(self._write_fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) self._write_fd.close() + sync_dir(os.path.dirname(self._write_fd.name)) self._write_fd = None From 9725c03299fca8c02c07295886a3ff400d4b565a Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 3 Jul 2016 02:58:17 +0200 Subject: [PATCH 09/20] close the repo on exit - even if rollback did not work, fixes #1197 --- borg/remote.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/borg/remote.py b/borg/remote.py index 85012382a..26dce53d4 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -189,9 +189,14 @@ class RemoteRepository: return self def __exit__(self, exc_type, exc_val, exc_tb): - if exc_type is not None: - self.rollback() - self.close() + try: + if exc_type is not None: + self.rollback() + finally: + # in any case, we want to cleanly close the repo, even if the + # rollback can not succeed (e.g. because the connection was + # already closed) and raised another exception: + self.close() def borg_cmd(self, args, testing): """return a borg serve command line""" From f6deb09184bb8e38bec75eec9e0262be43ceed70 Mon Sep 17 00:00:00 2001 From: Joachim Breitner Date: Sun, 3 Jul 2016 16:27:02 +0200 Subject: [PATCH 10/20] Documentation: Explicate that Fnmatch is default This fixes #1247. It also regenerates the usage documentation, so that styling fixex in that section (as well as other existing changes) make it into the files in docs/. --- borg/archiver.py | 35 +++++------ docs/usage/create.rst.inc | 1 + docs/usage/help.rst.inc | 119 +++++++++++++++++++++++++------------- docs/usage/prune.rst.inc | 4 +- docs/usage/serve.rst.inc | 2 + 5 files changed, 102 insertions(+), 59 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index d8f8958e5..513f5f874 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -726,26 +726,27 @@ class Archiver: helptext = {} helptext['patterns'] = textwrap.dedent(''' Exclusion patterns support four separate styles, fnmatch, shell, regular - expressions and path prefixes. If followed by a colon (':') the first two - characters of a pattern are used as a style selector. Explicit style - selection is necessary when a non-default style is desired or when the - desired pattern starts with two alphanumeric characters followed by a colon - (i.e. `aa:something/*`). + expressions and path prefixes. By default, fnmatch is used. If followed + by a colon (':') the first two characters of a pattern are used as a + style selector. Explicit style selection is necessary when a + non-default style is desired or when the desired pattern starts with + two alphanumeric characters followed by a colon (i.e. `aa:something/*`). `Fnmatch `_, selector `fm:` - These patterns use a variant of shell pattern syntax, with '*' matching - any number of characters, '?' matching any single character, '[...]' - matching any single character specified, including ranges, and '[!...]' - matching any character not specified. For the purpose of these patterns, - the path separator ('\\' for Windows and '/' on other systems) is not - treated specially. Wrap meta-characters in brackets for a literal match - (i.e. `[?]` to match the literal character `?`). For a path to match - a pattern, it must completely match from start to end, or must match from - the start to just before a path separator. Except for the root path, - paths will never end in the path separator when matching is attempted. - Thus, if a given pattern ends in a path separator, a '*' is appended - before matching is attempted. + This is the default style. These patterns use a variant of shell + pattern syntax, with '*' matching any number of characters, '?' + matching any single character, '[...]' matching any single + character specified, including ranges, and '[!...]' matching any + character not specified. For the purpose of these patterns, the + path separator ('\\' for Windows and '/' on other systems) is not + treated specially. Wrap meta-characters in brackets for a literal + match (i.e. `[?]` to match the literal character `?`). For a path + to match a pattern, it must completely match from start to end, or + must match from the start to just before a path separator. Except + for the root path, paths will never end in the path separator when + matching is attempted. Thus, if a given pattern ends in a path + separator, a '*' is appended before matching is attempted. Shell-style patterns, selector `sh:` diff --git a/docs/usage/create.rst.inc b/docs/usage/create.rst.inc index 74c228b6c..632541d50 100644 --- a/docs/usage/create.rst.inc +++ b/docs/usage/create.rst.inc @@ -95,3 +95,4 @@ potentially decreases reliability of change detection, while avoiding always rea all files on these file systems. See the output of the "borg help patterns" command for more help on exclude patterns. +See the output of the "borg help placeholders" command for more help on placeholders. diff --git a/docs/usage/help.rst.inc b/docs/usage/help.rst.inc index b7ea093b1..9b8d23d66 100644 --- a/docs/usage/help.rst.inc +++ b/docs/usage/help.rst.inc @@ -1,3 +1,41 @@ +.. _borg_placeholders: + +borg help placeholders +~~~~~~~~~~~~~~~~~~~~~~ +:: + + +Repository (or Archive) URLs and --prefix values support these placeholders: + +{hostname} + + The (short) hostname of the machine. + +{fqdn} + + The full name of the machine. + +{now} + + The current local date and time. + +{utcnow} + + The current UTC date and time. + +{user} + + The user name (or UID, if no name is available) of the user running borg. + +{pid} + + The current process ID. + +Examples:: + + borg create /path/to/repo::{hostname}-{user}-{utcnow} ... + borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ... + borg prune --prefix '{hostname}-' ... .. _borg_patterns: borg help patterns @@ -6,26 +44,27 @@ borg help patterns Exclusion patterns support four separate styles, fnmatch, shell, regular -expressions and path prefixes. If followed by a colon (':') the first two -characters of a pattern are used as a style selector. Explicit style -selection is necessary when a non-default style is desired or when the -desired pattern starts with two alphanumeric characters followed by a colon -(i.e. `aa:something/*`). +expressions and path prefixes. By default, fnmatch is used. If followed +by a colon (':') the first two characters of a pattern are used as a +style selector. Explicit style selection is necessary when a +non-default style is desired or when the desired pattern starts with +two alphanumeric characters followed by a colon (i.e. `aa:something/*`). `Fnmatch `_, selector `fm:` - These patterns use a variant of shell pattern syntax, with '*' matching - any number of characters, '?' matching any single character, '[...]' - matching any single character specified, including ranges, and '[!...]' - matching any character not specified. For the purpose of these patterns, - the path separator ('\' for Windows and '/' on other systems) is not - treated specially. Wrap meta-characters in brackets for a literal match - (i.e. `[?]` to match the literal character `?`). For a path to match - a pattern, it must completely match from start to end, or must match from - the start to just before a path separator. Except for the root path, - paths will never end in the path separator when matching is attempted. - Thus, if a given pattern ends in a path separator, a '*' is appended - before matching is attempted. + This is the default style. These patterns use a variant of shell + pattern syntax, with '*' matching any number of characters, '?' + matching any single character, '[...]' matching any single + character specified, including ranges, and '[!...]' matching any + character not specified. For the purpose of these patterns, the + path separator ('\' for Windows and '/' on other systems) is not + treated specially. Wrap meta-characters in brackets for a literal + match (i.e. `[?]` to match the literal character `?`). For a path + to match a pattern, it must completely match from start to end, or + must match from the start to just before a path separator. Except + for the root path, paths will never end in the path separator when + matching is attempted. Thus, if a given pattern ends in a path + separator, a '*' is appended before matching is attempted. Shell-style patterns, selector `sh:` @@ -61,32 +100,32 @@ selector prefix is also supported for patterns loaded from a file. Due to whitespace removal paths with whitespace at the beginning or end can only be excluded using regular expressions. -Examples: +Examples:: -# Exclude '/home/user/file.o' but not '/home/user/file.odt': -$ borg create -e '*.o' backup / + # Exclude '/home/user/file.o' but not '/home/user/file.odt': + $ borg create -e '*.o' backup / -# Exclude '/home/user/junk' and '/home/user/subdir/junk' but -# not '/home/user/importantjunk' or '/etc/junk': -$ borg create -e '/home/*/junk' backup / + # Exclude '/home/user/junk' and '/home/user/subdir/junk' but + # not '/home/user/importantjunk' or '/etc/junk': + $ borg create -e '/home/*/junk' backup / -# Exclude the contents of '/home/user/cache' but not the directory itself: -$ borg create -e /home/user/cache/ backup / + # Exclude the contents of '/home/user/cache' but not the directory itself: + $ borg create -e /home/user/cache/ backup / -# The file '/home/user/cache/important' is *not* backed up: -$ borg create -e /home/user/cache/ backup / /home/user/cache/important + # The file '/home/user/cache/important' is *not* backed up: + $ borg create -e /home/user/cache/ backup / /home/user/cache/important -# The contents of directories in '/home' are not backed up when their name -# ends in '.tmp' -$ borg create --exclude 're:^/home/[^/]+\.tmp/' backup / + # The contents of directories in '/home' are not backed up when their name + # ends in '.tmp' + $ borg create --exclude 're:^/home/[^/]+\.tmp/' backup / -# Load exclusions from file -$ cat >exclude.txt <exclude.txt < Date: Sun, 3 Jul 2016 23:57:55 +0200 Subject: [PATCH 11/20] fixup rename --- borg/archive.py | 6 +++--- borg/testsuite/archive.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index bf65d49ee..4804df154 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -67,7 +67,7 @@ def backup_io(): raise BackupOSError(os_error) from os_error -def input_io_iter(iterator): +def backup_io_iter(iterator): while True: try: with backup_io(): @@ -552,7 +552,7 @@ Number of files: {0.stats.nfiles}'''.format( uid, gid = 0, 0 fd = sys.stdin.buffer # binary chunks = [] - for chunk in input_io_iter(self.chunker.chunkify(fd)): + for chunk in backup_io_iter(self.chunker.chunkify(fd)): chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats)) self.stats.nfiles += 1 t = int_to_bigint(int(time.time()) * 1000000000) @@ -604,7 +604,7 @@ Number of files: {0.stats.nfiles}'''.format( fh = Archive._open_rb(path) with os.fdopen(fh, 'rb') as fd: chunks = [] - for chunk in input_io_iter(self.chunker.chunkify(fd, fh)): + for chunk in backup_io_iter(self.chunker.chunkify(fd, fh)): chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats)) if self.show_progress: self.stats.show_progress(item=item, dt=0.2) diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index 2b3961be5..2dbdd7bc8 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -5,7 +5,7 @@ import msgpack import pytest from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS -from ..archive import BackupOSError, backup_io, input_io_iter +from ..archive import BackupOSError, backup_io, backup_io_iter from ..key import PlaintextKey from ..helpers import Manifest from . import BaseTestCase @@ -164,9 +164,9 @@ def test_backup_io_iter(): oserror_iterator = Iterator(OSError) with pytest.raises(BackupOSError): - for _ in input_io_iter(oserror_iterator): + for _ in backup_io_iter(oserror_iterator): pass normal_iterator = Iterator(StopIteration) - for _ in input_io_iter(normal_iterator): + for _ in backup_io_iter(normal_iterator): assert False, 'StopIteration handled incorrectly' From 93f4b09d419d1d504c0be1bb3c8edb4b79112bbd Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 3 Jul 2016 23:58:12 +0200 Subject: [PATCH 12/20] BackupOSError documentation --- borg/archive.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 4804df154..ab4ddb0d9 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -47,7 +47,15 @@ flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0) class BackupOSError(Exception): - """Wrapper for OSError raised while accessing input files.""" + """ + Wrapper for OSError raised while accessing backup files. + + Borg does different kinds of IO, and IO failures have different consequences. + This wrapper represents failures of input file or extraction IO. + These are non-critical and are only reported (exit code = 1, warning). + + Any unwrapped IO error is critical and aborts execution (for example repository IO failure). + """ def __init__(self, os_error): self.os_error = os_error self.errno = os_error.errno @@ -414,7 +422,7 @@ Number of files: {0.stats.nfiles}'''.format( def restore_attrs(self, path, item, symlink=False, fd=None): """ - Restore filesystem attributes on *path* from *item* (*fd*). + Restore filesystem attributes on *path* (*fd*) from *item*. Does not access the repository. """ From e7740458cd549b746b1b102d9459a4fa0fac1749 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Mon, 4 Jul 2016 00:44:25 +0200 Subject: [PATCH 13/20] Repository: Read v2 hints files Fixes #1235 --- borg/repository.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/borg/repository.py b/borg/repository.py index 275802751..fa3919f61 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -219,10 +219,14 @@ class Repository: self.io.cleanup(transaction_id) with open(os.path.join(self.path, 'hints.%d' % transaction_id), 'rb') as fd: hints = msgpack.unpack(fd) - if hints[b'version'] != 1: + hints_version = hints[b'version'] + if hints_version not in (1, 2): raise ValueError('Unknown hints file version: %d' % hints['version']) self.segments = hints[b'segments'] - self.compact = set(hints[b'compact']) + if hints_version == 1: + self.compact = set(hints[b'compact']) + elif hints_version == 2: + self.compact = set(hints[b'compact'].keys()) def write_index(self): hints = {b'version': 1, From f3aaffdb39a422ee7b64ba435b16e0f227d5b6dd Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Mon, 4 Jul 2016 00:55:12 +0200 Subject: [PATCH 14/20] Repository: fix hints file unknown version error handling bug --- borg/repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/repository.py b/borg/repository.py index fa3919f61..972baab09 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -221,7 +221,7 @@ class Repository: hints = msgpack.unpack(fd) hints_version = hints[b'version'] if hints_version not in (1, 2): - raise ValueError('Unknown hints file version: %d' % hints['version']) + raise ValueError('Unknown hints file version: %d' % hints_version) self.segments = hints[b'segments'] if hints_version == 1: self.compact = set(hints[b'compact']) From 7c8783e7c2c785294589198231fa9e923733bce9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 2 Jul 2016 18:47:22 +0200 Subject: [PATCH 15/20] Revert "in --read-special mode, follow symlinks, fixes #1215" This reverts commit c3073bacbb4850cf3a526ac08db42b05f3e33684. Needs to be solved differently, see following changesets. --- borg/archiver.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index c8d4b1355..9996ede5e 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -256,15 +256,7 @@ class Archiver: return try: - # usually, do not follow symlinks (if we have a symlink, we want to - # backup it as such). - # but if we are in --read-special mode, we later process as - # a regular file (we open and read the symlink target file's content). - # thus, in read_special mode, we also want to stat the symlink target - # file, for consistency. if we did not, we also have issues extracting - # this file, as it would be in the archive as a symlink, not as the - # target's file type (which could be e.g. a block device). - st = os.stat(path, follow_symlinks=read_special) + st = os.lstat(path) except OSError as e: self.print_warning('%s: %s', path, e) return From 58515d0f9502f6cb578f8be3ef0e5854f22803e9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 2 Jul 2016 18:51:03 +0200 Subject: [PATCH 16/20] remove old implementation of --read-special this was a bit too simple, better implementation see next changesets. --- borg/archiver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/archiver.py b/borg/archiver.py index 9996ede5e..5d12bfc2d 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -269,7 +269,7 @@ class Archiver: # Ignore if nodump flag is set if has_lchflags and (st.st_flags & stat.UF_NODUMP): return - if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode): + if stat.S_ISREG(st.st_mode): if not dry_run: try: status = archive.process_file(path, st, cache, self.ignore_inode) From a3ef692132d394f5fbb3cca17cd6286c843db10e Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 2 Jul 2016 19:44:26 +0200 Subject: [PATCH 17/20] reimplement --read-special, fixes #1241 --- borg/archive.py | 16 +++++++++++++--- borg/archiver.py | 10 ++++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index ab4ddb0d9..db16240fe 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -589,9 +589,16 @@ Number of files: {0.stats.nfiles}'''.format( return status else: self.hard_links[st.st_ino, st.st_dev] = safe_path - path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape')) + is_regular_file = stat.S_ISREG(st.st_mode) + if is_regular_file: + path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape')) + ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode) + else: + # in --read-special mode, we may be called for special files. + # there should be no information in the cache about special files processed in + # read-special mode, but we better play safe as this was wrong in the past: + path_hash = ids = None first_run = not cache.files - ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode) if first_run: logger.debug('Processing files ...') chunks = None @@ -616,7 +623,10 @@ Number of files: {0.stats.nfiles}'''.format( chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats)) if self.show_progress: self.stats.show_progress(item=item, dt=0.2) - cache.memorize_file(path_hash, st, [c[0] for c in chunks]) + if is_regular_file: + # we must not memorize special files, because the contents of e.g. a + # block or char device will change without its mtime/size/inode changing. + cache.memorize_file(path_hash, st, [c[0] for c in chunks]) status = status or 'M' # regular file, modified (if not 'A' already) item[b'chunks'] = chunks item.update(self.stat_attrs(st, path)) diff --git a/borg/archiver.py b/borg/archiver.py index 5d12bfc2d..85dc79f54 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -304,10 +304,16 @@ class Archiver: status = archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): if not dry_run: - status = archive.process_fifo(path, st) + if not read_special: + status = archive.process_fifo(path, st) + else: + status = archive.process_file(path, st, cache) elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode): if not dry_run: - status = archive.process_dev(path, st) + if not read_special: + status = archive.process_dev(path, st) + else: + status = archive.process_file(path, st, cache) elif stat.S_ISSOCK(st.st_mode): # Ignore unix sockets return From 5476ece81e3282d1e06ceeea229f7504f8d59e3e Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 2 Jul 2016 20:17:07 +0200 Subject: [PATCH 18/20] fake regular file mode for --read-special mode, fixes #1214 --- borg/archive.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/borg/archive.py b/borg/archive.py index db16240fe..472190c6f 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -630,6 +630,10 @@ Number of files: {0.stats.nfiles}'''.format( status = status or 'M' # regular file, modified (if not 'A' already) item[b'chunks'] = chunks item.update(self.stat_attrs(st, path)) + if not is_regular_file: + # we processed a special file like a regular file. reflect that in mode, + # so it can be extracted / accessed in fuse mount like a regular file: + item[b'mode'] = stat.S_IFREG | stat.S_IMODE(item[b'mode']) self.stats.nfiles += 1 self.add_item(item) return status From 00a5470125719b49c82cf8e934a1933f095d79bc Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 2 Jul 2016 21:04:51 +0200 Subject: [PATCH 19/20] symlink processing for --read-special mode processing depends on symlink target: - if target is a special file: process the symlink as a regular file - if target is anything else: process the symlink as symlink refactor code a little to avoid duplication. --- borg/archive.py | 15 ++++++++++----- borg/archiver.py | 11 +++++++++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 472190c6f..1ce93ab5d 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -46,6 +46,11 @@ flags_normal = os.O_RDONLY | getattr(os, 'O_BINARY', 0) flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0) +def is_special(mode): + # file types that get special treatment in --read-special mode + return stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode) + + class BackupOSError(Exception): """ Wrapper for OSError raised while accessing backup files. @@ -589,8 +594,8 @@ Number of files: {0.stats.nfiles}'''.format( return status else: self.hard_links[st.st_ino, st.st_dev] = safe_path - is_regular_file = stat.S_ISREG(st.st_mode) - if is_regular_file: + is_special_file = is_special(st.st_mode) + if not is_special_file: path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape')) ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode) else: @@ -623,16 +628,16 @@ Number of files: {0.stats.nfiles}'''.format( chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats)) if self.show_progress: self.stats.show_progress(item=item, dt=0.2) - if is_regular_file: + if not is_special_file: # we must not memorize special files, because the contents of e.g. a # block or char device will change without its mtime/size/inode changing. cache.memorize_file(path_hash, st, [c[0] for c in chunks]) status = status or 'M' # regular file, modified (if not 'A' already) item[b'chunks'] = chunks item.update(self.stat_attrs(st, path)) - if not is_regular_file: + if is_special_file: # we processed a special file like a regular file. reflect that in mode, - # so it can be extracted / accessed in fuse mount like a regular file: + # so it can be extracted / accessed in FUSE mount like a regular file: item[b'mode'] = stat.S_IFREG | stat.S_IMODE(item[b'mode']) self.stats.nfiles += 1 self.add_item(item) diff --git a/borg/archiver.py b/borg/archiver.py index 85dc79f54..ecc3fc21a 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -29,7 +29,7 @@ from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader from .repository import Repository from .cache import Cache from .key import key_creator, RepoKey, PassphraseKey -from .archive import backup_io, BackupOSError, Archive, ArchiveChecker, CHUNKER_PARAMS +from .archive import backup_io, BackupOSError, Archive, ArchiveChecker, CHUNKER_PARAMS, is_special from .remote import RepositoryServer, RemoteRepository, cache_if_remote has_lchflags = hasattr(os, 'lchflags') @@ -301,7 +301,14 @@ class Archiver: read_special=read_special, dry_run=dry_run) elif stat.S_ISLNK(st.st_mode): if not dry_run: - status = archive.process_symlink(path, st) + if not read_special: + status = archive.process_symlink(path, st) + else: + st_target = os.stat(path) + if is_special(st_target.st_mode): + status = archive.process_file(path, st_target, cache) + else: + status = archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): if not dry_run: if not read_special: From d59a1d5fefd17f57ed70203173bca7a395c0d09d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 2 Jul 2016 21:34:45 +0200 Subject: [PATCH 20/20] doc and help update for --read-special --- borg/archiver.py | 3 ++- docs/usage.rst | 34 ++++++++++++++++++---------------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index ecc3fc21a..49e0bdbaa 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -1139,7 +1139,8 @@ class Archiver: 'lzma,0 .. lzma,9 == lzma (with level 0..9).') subparser.add_argument('--read-special', dest='read_special', action='store_true', default=False, - help='open and read special files as if they were regular files') + help='open and read block and char device files as well as FIFOs as if they were ' + 'regular files. Also follows symlinks pointing to these kinds of files.') subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', default=False, help='do not create a backup archive') diff --git a/docs/usage.rst b/docs/usage.rst index e1a30060b..ca8d536db 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -651,32 +651,34 @@ For more details, see :ref:`chunker_details`. --read-special ~~~~~~~~~~~~~~ -The option ``--read-special`` is not intended for normal, filesystem-level (full or -partly-recursive) backups. You only give this option if you want to do something -rather ... special -- and if you have hand-picked some files that you want to treat -that way. +The --read-special option is special - you do not want to use it for normal +full-filesystem backups, but rather after carefully picking some targets for it. -``borg create --read-special`` will open all files without doing any special -treatment according to the file type (the only exception here are directories: -they will be recursed into). Just imagine what happens if you do ``cat -filename`` --- the content you will see there is what borg will backup for that -filename. +The option ``--read-special`` triggers special treatment for block and char +device files as well as FIFOs. Instead of storing them as such a device (or +FIFO), they will get opened, their content will be read and in the backup +archive they will show up like a regular file. -So, for example, symlinks will be followed, block device content will be read, -named pipes / UNIX domain sockets will be read. +Symlinks will also get special treatment if (and only if) they point to such +a special file: instead of storing them as a symlink, the target special file +will get processed as described above. -You need to be careful with what you give as filename when using ``--read-special``, -e.g. if you give ``/dev/zero``, your backup will never terminate. +One intended use case of this is backing up the contents of one or multiple +block devices, like e.g. LVM snapshots or inactive LVs or disk partitions. -The given files' metadata is saved as it would be saved without -``--read-special`` (e.g. its name, its size [might be 0], its mode, etc.) -- but -additionally, also the content read from it will be saved for it. +You need to be careful about what you include when using ``--read-special``, +e.g. if you include ``/dev/zero``, your backup will never terminate. Restoring such files' content is currently only supported one at a time via ``--stdout`` option (and you have to redirect stdout to where ever it shall go, maybe directly into an existing device file of your choice or indirectly via ``dd``). +To some extent, mounting a backup archive with the backups of special files +via ``borg mount`` and then loop-mounting the image files from inside the mount +point will work. If you plan to access a lot of data in there, it likely will +scale and perform better if you do not work via the FUSE mount. + Example +++++++