diff --git a/.gitignore b/.gitignore index 059bf2d37..935517fd7 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,4 @@ borg.dist/ borg.exe .coverage .vagrant +.eggs diff --git a/docs/usage.rst b/docs/usage.rst index 8b06430fd..0fab01d26 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -738,32 +738,34 @@ For more details, see :ref:`chunker_details`. --read-special ~~~~~~~~~~~~~~ -The option ``--read-special`` is not intended for normal, filesystem-level (full or -partly-recursive) backups. You only give this option if you want to do something -rather ... special -- and if you have hand-picked some files that you want to treat -that way. +The --read-special option is special - you do not want to use it for normal +full-filesystem backups, but rather after carefully picking some targets for it. -``borg create --read-special`` will open all files without doing any special -treatment according to the file type (the only exception here are directories: -they will be recursed into). Just imagine what happens if you do ``cat -filename`` --- the content you will see there is what borg will backup for that -filename. +The option ``--read-special`` triggers special treatment for block and char +device files as well as FIFOs. Instead of storing them as such a device (or +FIFO), they will get opened, their content will be read and in the backup +archive they will show up like a regular file. -So, for example, symlinks will be followed, block device content will be read, -named pipes / UNIX domain sockets will be read. +Symlinks will also get special treatment if (and only if) they point to such +a special file: instead of storing them as a symlink, the target special file +will get processed as described above. -You need to be careful with what you give as filename when using ``--read-special``, -e.g. if you give ``/dev/zero``, your backup will never terminate. +One intended use case of this is backing up the contents of one or multiple +block devices, like e.g. LVM snapshots or inactive LVs or disk partitions. -The given files' metadata is saved as it would be saved without -``--read-special`` (e.g. its name, its size [might be 0], its mode, etc.) -- but -additionally, also the content read from it will be saved for it. +You need to be careful about what you include when using ``--read-special``, +e.g. if you include ``/dev/zero``, your backup will never terminate. Restoring such files' content is currently only supported one at a time via ``--stdout`` option (and you have to redirect stdout to where ever it shall go, maybe directly into an existing device file of your choice or indirectly via ``dd``). +To some extent, mounting a backup archive with the backups of special files +via ``borg mount`` and then loop-mounting the image files from inside the mount +point will work. If you plan to access a lot of data in there, it likely will +scale and perform better if you do not work via the FUSE mount. + Example +++++++ @@ -817,6 +819,13 @@ To activate append-only mode, edit the repository ``config`` file and add a line In append-only mode Borg will create a transaction log in the ``transactions`` file, where each line is a transaction and a UTC timestamp. +In addition, ``borg serve`` can act as if a repository is in append-only mode with +its option ``--append-only``. This can be very useful for fine-tuning access control +in ``.ssh/authorized_keys`` :: + + command="borg serve --append-only ..." ssh-rsa + command="borg serve ..." ssh-rsa + Example +++++++ diff --git a/docs/usage/create.rst.inc b/docs/usage/create.rst.inc index 9389249f2..7b49f109f 100644 --- a/docs/usage/create.rst.inc +++ b/docs/usage/create.rst.inc @@ -87,3 +87,4 @@ potentially decreases reliability of change detection, while avoiding always rea all files on these file systems. See the output of the "borg help patterns" command for more help on exclude patterns. +See the output of the "borg help placeholders" command for more help on placeholders. diff --git a/docs/usage/help.rst.inc b/docs/usage/help.rst.inc index b7ea093b1..9b8d23d66 100644 --- a/docs/usage/help.rst.inc +++ b/docs/usage/help.rst.inc @@ -1,3 +1,41 @@ +.. _borg_placeholders: + +borg help placeholders +~~~~~~~~~~~~~~~~~~~~~~ +:: + + +Repository (or Archive) URLs and --prefix values support these placeholders: + +{hostname} + + The (short) hostname of the machine. + +{fqdn} + + The full name of the machine. + +{now} + + The current local date and time. + +{utcnow} + + The current UTC date and time. + +{user} + + The user name (or UID, if no name is available) of the user running borg. + +{pid} + + The current process ID. + +Examples:: + + borg create /path/to/repo::{hostname}-{user}-{utcnow} ... + borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ... + borg prune --prefix '{hostname}-' ... .. _borg_patterns: borg help patterns @@ -6,26 +44,27 @@ borg help patterns Exclusion patterns support four separate styles, fnmatch, shell, regular -expressions and path prefixes. If followed by a colon (':') the first two -characters of a pattern are used as a style selector. Explicit style -selection is necessary when a non-default style is desired or when the -desired pattern starts with two alphanumeric characters followed by a colon -(i.e. `aa:something/*`). +expressions and path prefixes. By default, fnmatch is used. If followed +by a colon (':') the first two characters of a pattern are used as a +style selector. Explicit style selection is necessary when a +non-default style is desired or when the desired pattern starts with +two alphanumeric characters followed by a colon (i.e. `aa:something/*`). `Fnmatch `_, selector `fm:` - These patterns use a variant of shell pattern syntax, with '*' matching - any number of characters, '?' matching any single character, '[...]' - matching any single character specified, including ranges, and '[!...]' - matching any character not specified. For the purpose of these patterns, - the path separator ('\' for Windows and '/' on other systems) is not - treated specially. Wrap meta-characters in brackets for a literal match - (i.e. `[?]` to match the literal character `?`). For a path to match - a pattern, it must completely match from start to end, or must match from - the start to just before a path separator. Except for the root path, - paths will never end in the path separator when matching is attempted. - Thus, if a given pattern ends in a path separator, a '*' is appended - before matching is attempted. + This is the default style. These patterns use a variant of shell + pattern syntax, with '*' matching any number of characters, '?' + matching any single character, '[...]' matching any single + character specified, including ranges, and '[!...]' matching any + character not specified. For the purpose of these patterns, the + path separator ('\' for Windows and '/' on other systems) is not + treated specially. Wrap meta-characters in brackets for a literal + match (i.e. `[?]` to match the literal character `?`). For a path + to match a pattern, it must completely match from start to end, or + must match from the start to just before a path separator. Except + for the root path, paths will never end in the path separator when + matching is attempted. Thus, if a given pattern ends in a path + separator, a '*' is appended before matching is attempted. Shell-style patterns, selector `sh:` @@ -61,32 +100,32 @@ selector prefix is also supported for patterns loaded from a file. Due to whitespace removal paths with whitespace at the beginning or end can only be excluded using regular expressions. -Examples: +Examples:: -# Exclude '/home/user/file.o' but not '/home/user/file.odt': -$ borg create -e '*.o' backup / + # Exclude '/home/user/file.o' but not '/home/user/file.odt': + $ borg create -e '*.o' backup / -# Exclude '/home/user/junk' and '/home/user/subdir/junk' but -# not '/home/user/importantjunk' or '/etc/junk': -$ borg create -e '/home/*/junk' backup / + # Exclude '/home/user/junk' and '/home/user/subdir/junk' but + # not '/home/user/importantjunk' or '/etc/junk': + $ borg create -e '/home/*/junk' backup / -# Exclude the contents of '/home/user/cache' but not the directory itself: -$ borg create -e /home/user/cache/ backup / + # Exclude the contents of '/home/user/cache' but not the directory itself: + $ borg create -e /home/user/cache/ backup / -# The file '/home/user/cache/important' is *not* backed up: -$ borg create -e /home/user/cache/ backup / /home/user/cache/important + # The file '/home/user/cache/important' is *not* backed up: + $ borg create -e /home/user/cache/ backup / /home/user/cache/important -# The contents of directories in '/home' are not backed up when their name -# ends in '.tmp' -$ borg create --exclude 're:^/home/[^/]+\.tmp/' backup / + # The contents of directories in '/home' are not backed up when their name + # ends in '.tmp' + $ borg create --exclude 're:^/home/[^/]+\.tmp/' backup / -# Load exclusions from file -$ cat >exclude.txt <exclude.txt < compression %s', path, compress['name']) - with input_io(): + with backup_io(): fh = Archive._open_rb(path) with os.fdopen(fh, 'rb') as fd: chunks = [] - for data in input_io_iter(self.chunker.chunkify(fd, fh)): + for data in backup_io_iter(self.chunker.chunkify(fd, fh)): chunks.append(cache.add_chunk(self.key.id_hash(data), Chunk(data, compress=compress), self.stats)) if self.show_progress: self.stats.show_progress(item=item, dt=0.2) - cache.memorize_file(path_hash, st, [c.id for c in chunks]) + if not is_special_file: + # we must not memorize special files, because the contents of e.g. a + # block or char device will change without its mtime/size/inode changing. + cache.memorize_file(path_hash, st, [c.id for c in chunks]) status = status or 'M' # regular file, modified (if not 'A' already) item.chunks = chunks item.update(self.stat_attrs(st, path)) + if is_special_file: + # we processed a special file like a regular file. reflect that in mode, + # so it can be extracted / accessed in FUSE mount like a regular file: + item.mode = stat.S_IFREG | stat.S_IMODE(item.mode) self.stats.nfiles += 1 self.add_item(item) return status diff --git a/src/borg/archiver.py b/src/borg/archiver.py index d92b802aa..04329c83b 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -23,8 +23,8 @@ logger = create_logger() from . import __version__ from . import helpers -from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics -from .archive import InputOSError, CHUNKER_PARAMS +from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special +from .archive import BackupOSError, CHUNKER_PARAMS from .cache import Cache from .constants import * # NOQA from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR @@ -164,7 +164,7 @@ class Archiver: def do_serve(self, args): """Start in server mode. This command is usually not used manually. """ - return RepositoryServer(restrict_to_paths=args.restrict_to_paths).serve() + return RepositoryServer(restrict_to_paths=args.restrict_to_paths, append_only=args.append_only).serve() @with_repository(create=True, exclusive=True, manifest=False) def do_init(self, args, repository): @@ -255,7 +255,7 @@ class Archiver: if not dry_run: try: status = archive.process_stdin(path, cache) - except InputOSError as e: + except BackupOSError as e: status = 'E' self.print_warning('%s: %s', path, e) else: @@ -313,15 +313,7 @@ class Archiver: return if st is None: try: - # usually, do not follow symlinks (if we have a symlink, we want to - # backup it as such). - # but if we are in --read-special mode, we later process as - # a regular file (we open and read the symlink target file's content). - # thus, in read_special mode, we also want to stat the symlink target - # file, for consistency. if we did not, we also have issues extracting - # this file, as it would be in the archive as a symlink, not as the - # target's file type (which could be e.g. a block device). - st = os.stat(path, follow_symlinks=read_special) + st = os.lstat(path) except OSError as e: self.print_warning('%s: %s', path, e) return @@ -335,11 +327,11 @@ class Archiver: if get_flags(path, st) & stat.UF_NODUMP: self.print_file_status('x', path) return - if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode): + if stat.S_ISREG(st.st_mode): if not dry_run: try: status = archive.process_file(path, st, cache, self.ignore_inode) - except InputOSError as e: + except BackupOSError as e: status = 'E' self.print_warning('%s: %s', path, e) elif stat.S_ISDIR(st.st_mode): @@ -367,13 +359,26 @@ class Archiver: read_special=read_special, dry_run=dry_run) elif stat.S_ISLNK(st.st_mode): if not dry_run: - status = archive.process_symlink(path, st) + if not read_special: + status = archive.process_symlink(path, st) + else: + st_target = os.stat(path) + if is_special(st_target.st_mode): + status = archive.process_file(path, st_target, cache) + else: + status = archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): if not dry_run: - status = archive.process_fifo(path, st) + if not read_special: + status = archive.process_fifo(path, st) + else: + status = archive.process_file(path, st, cache) elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode): if not dry_run: - status = archive.process_dev(path, st) + if not read_special: + status = archive.process_dev(path, st) + else: + status = archive.process_file(path, st, cache) elif stat.S_ISSOCK(st.st_mode): # Ignore unix sockets return @@ -432,7 +437,11 @@ class Archiver: continue if not args.dry_run: while dirs and not item.path.startswith(dirs[-1].path): - archive.extract_item(dirs.pop(-1), stdout=stdout) + dir_item = dirs.pop(-1) + try: + archive.extract_item(dir_item, stdout=stdout) + except BackupOSError as e: + self.print_warning('%s: %s', remove_surrogates(dir_item[b'path']), e) if output_list: logging.getLogger('borg.output.list').info(remove_surrogates(orig_path)) try: @@ -445,12 +454,16 @@ class Archiver: else: archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters, original_path=orig_path) - except OSError as e: + except BackupOSError as e: self.print_warning('%s: %s', remove_surrogates(orig_path), e) if not args.dry_run: while dirs: - archive.extract_item(dirs.pop(-1)) + dir_item = dirs.pop(-1) + try: + archive.extract_item(dir_item) + except BackupOSError as e: + self.print_warning('%s: %s', remove_surrogates(dir_item[b'path']), e) for pattern in include_patterns: if pattern.match_count == 0: self.print_warning("Include pattern '%s' never matched.", pattern) @@ -1033,26 +1046,27 @@ class Archiver: helptext = {} helptext['patterns'] = textwrap.dedent(''' Exclusion patterns support four separate styles, fnmatch, shell, regular - expressions and path prefixes. If followed by a colon (':') the first two - characters of a pattern are used as a style selector. Explicit style - selection is necessary when a non-default style is desired or when the - desired pattern starts with two alphanumeric characters followed by a colon - (i.e. `aa:something/*`). + expressions and path prefixes. By default, fnmatch is used. If followed + by a colon (':') the first two characters of a pattern are used as a + style selector. Explicit style selection is necessary when a + non-default style is desired or when the desired pattern starts with + two alphanumeric characters followed by a colon (i.e. `aa:something/*`). `Fnmatch `_, selector `fm:` - These patterns use a variant of shell pattern syntax, with '*' matching - any number of characters, '?' matching any single character, '[...]' - matching any single character specified, including ranges, and '[!...]' - matching any character not specified. For the purpose of these patterns, - the path separator ('\\' for Windows and '/' on other systems) is not - treated specially. Wrap meta-characters in brackets for a literal match - (i.e. `[?]` to match the literal character `?`). For a path to match - a pattern, it must completely match from start to end, or must match from - the start to just before a path separator. Except for the root path, - paths will never end in the path separator when matching is attempted. - Thus, if a given pattern ends in a path separator, a '*' is appended - before matching is attempted. + This is the default style. These patterns use a variant of shell + pattern syntax, with '*' matching any number of characters, '?' + matching any single character, '[...]' matching any single + character specified, including ranges, and '[!...]' matching any + character not specified. For the purpose of these patterns, the + path separator ('\\' for Windows and '/' on other systems) is not + treated specially. Wrap meta-characters in brackets for a literal + match (i.e. `[?]` to match the literal character `?`). For a path + to match a pattern, it must completely match from start to end, or + must match from the start to just before a path separator. Except + for the root path, paths will never end in the path separator when + matching is attempted. Thus, if a given pattern ends in a path + separator, a '*' is appended before matching is attempted. Shell-style patterns, selector `sh:` @@ -1229,6 +1243,8 @@ class Archiver: subparser.set_defaults(func=self.do_serve) subparser.add_argument('--restrict-to-path', dest='restrict_to_paths', action='append', metavar='PATH', help='restrict repository access to PATH') + subparser.add_argument('--append-only', dest='append_only', action='store_true', + help='only allow appending to repository segment files') init_epilog = textwrap.dedent(""" This command initializes an empty repository. A repository is a filesystem directory containing the deduplicated data from zero or more archives. @@ -1485,7 +1501,8 @@ class Archiver: help='ignore inode data in the file metadata cache used to detect unchanged files.') fs_group.add_argument('--read-special', dest='read_special', action='store_true', default=False, - help='open and read special files as if they were regular files') + help='open and read block and char device files as well as FIFOs as if they were ' + 'regular files. Also follows symlinks pointing to these kinds of files.') archive_group = subparser.add_argument_group('Archive options') archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', default='', @@ -2123,8 +2140,9 @@ class Archiver: if result.func != forced_result.func: # someone is trying to execute a different borg subcommand, don't do that! return forced_result - # the only thing we take from the forced "borg serve" ssh command is --restrict-to-path + # we only take specific options from the forced "borg serve" command: result.restrict_to_paths = forced_result.restrict_to_paths + result.append_only = forced_result.append_only return result def parse_args(self, args=None): diff --git a/src/borg/remote.py b/src/borg/remote.py index fb67e1d57..39b325fd4 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -58,9 +58,10 @@ class RepositoryServer: # pragma: no cover 'break_lock', ) - def __init__(self, restrict_to_paths): + def __init__(self, restrict_to_paths, append_only): self.repository = None self.restrict_to_paths = restrict_to_paths + self.append_only = append_only def serve(self): stdin_fd = sys.stdin.fileno() @@ -127,7 +128,7 @@ class RepositoryServer: # pragma: no cover break else: raise PathNotAllowed(path) - self.repository = Repository(path, create, lock_wait=lock_wait, lock=lock) + self.repository = Repository(path, create, lock_wait=lock_wait, lock=lock, append_only=self.append_only) self.repository.__enter__() # clean exit handled by serve() method return self.repository.id @@ -192,9 +193,14 @@ class RemoteRepository: return self def __exit__(self, exc_type, exc_val, exc_tb): - if exc_type is not None: - self.rollback() - self.close() + try: + if exc_type is not None: + self.rollback() + finally: + # in any case, we want to cleanly close the repo, even if the + # rollback can not succeed (e.g. because the connection was + # already closed) and raised another exception: + self.close() @property def id_str(self): diff --git a/src/borg/repository.py b/src/borg/repository.py index aac8af066..526db9e4e 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -96,7 +96,7 @@ class Repository: class ObjectNotFound(ErrorWithTraceback): """Object with key {} not found in repository {}.""" - def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True): + def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False): self.path = os.path.abspath(path) self._location = Location('file://%s' % self.path) self.io = None @@ -107,6 +107,7 @@ class Repository: self.do_lock = lock self.do_create = create self.exclusive = exclusive + self.append_only = append_only def __del__(self): if self.lock: @@ -219,7 +220,9 @@ class Repository: raise self.InvalidRepository(path) self.max_segment_size = self.config.getint('repository', 'max_segment_size') self.segments_per_dir = self.config.getint('repository', 'segments_per_dir') - self.append_only = self.config.getboolean('repository', 'append_only', fallback=False) + # append_only can be set in the constructor + # it shouldn't be overridden (True -> False) here + self.append_only = self.append_only or self.config.getboolean('repository', 'append_only', fallback=False) self.id = unhexlify(self.config.get('repository', 'id').strip()) self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir) diff --git a/src/borg/testsuite/archive.py b/src/borg/testsuite/archive.py index ee4a86c32..527f7bde2 100644 --- a/src/borg/testsuite/archive.py +++ b/src/borg/testsuite/archive.py @@ -7,7 +7,7 @@ import pytest import msgpack from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics -from ..archive import InputOSError, input_io, input_io_iter +from ..archive import BackupOSError, backup_io, backup_io_iter from ..item import Item from ..key import PlaintextKey from ..helpers import Manifest @@ -219,13 +219,13 @@ def test_key_length_msgpacked_items(): assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized) -def test_input_io(): - with pytest.raises(InputOSError): - with input_io(): +def test_backup_io(): + with pytest.raises(BackupOSError): + with backup_io(): raise OSError(123) -def test_input_io_iter(): +def test_backup_io_iter(): class Iterator: def __init__(self, exc): self.exc = exc @@ -234,10 +234,10 @@ def test_input_io_iter(): raise self.exc() oserror_iterator = Iterator(OSError) - with pytest.raises(InputOSError): - for _ in input_io_iter(oserror_iterator): + with pytest.raises(BackupOSError): + for _ in backup_io_iter(oserror_iterator): pass normal_iterator = Iterator(StopIteration) - for _ in input_io_iter(normal_iterator): + for _ in backup_io_iter(normal_iterator): assert False, 'StopIteration handled incorrectly' diff --git a/src/borg/testsuite/repository.py b/src/borg/testsuite/repository.py index 88eb5389a..48ccf16fa 100644 --- a/src/borg/testsuite/repository.py +++ b/src/borg/testsuite/repository.py @@ -244,11 +244,14 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase): + def open(self, create=False): + return Repository(os.path.join(self.tmppath, 'repository'), create=create, append_only=True) + def test_destroy_append_only(self): # Can't destroy append only repo (via the API) - self.repository.append_only = True with self.assert_raises(ValueError): self.repository.destroy() + assert self.repository.append_only def test_append_only(self): def segments_in_repository():