add two new options --pattern and --patterns-from as discussed in #1406

This commit is contained in:
Alexander 'Leo' Bergolth 2016-08-02 16:02:02 +02:00
parent d5bc486dc9
commit 876b670d60
3 changed files with 240 additions and 42 deletions

View file

@ -18,9 +18,9 @@ import collections
from . import __version__
from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \
parse_pattern, PathPrefixPattern, to_localtime, timestamp, safe_timestamp, bin_to_hex, \
get_cache_dir, prune_within, prune_split, \
Manifest, NoManifestError, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
parse_pattern, parse_exclude_pattern, parse_inclexcl_pattern, PathPrefixPattern, to_localtime, timestamp, \
safe_timestamp, bin_to_hex, get_cache_dir, prune_within, prune_split, \
Manifest, NoManifestError, remove_surrogates, update_patterns, format_archive, check_extension_modules, Statistics, \
dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, PrefixSpec, is_slow_msgpack, yes, sysinfo, \
EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ErrorIgnoringTextIOWrapper
from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
@ -121,6 +121,18 @@ class Archiver:
if self.output_list and (self.output_filter is None or status in self.output_filter):
logger.info("%1s %s", status, remove_surrogates(path))
@staticmethod
def build_matcher(inclexcl_patterns, paths):
matcher = PatternMatcher()
if inclexcl_patterns:
matcher.add_inclexcl(inclexcl_patterns)
include_patterns = []
if paths:
include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in paths)
matcher.add(include_patterns, True)
matcher.fallback = not include_patterns
return matcher, include_patterns
def do_serve(self, args):
"""Start in server mode. This command is usually not used manually.
"""
@ -233,8 +245,7 @@ class Archiver:
def do_create(self, args, repository, manifest=None, key=None):
"""Create new archive"""
matcher = PatternMatcher(fallback=True)
if args.excludes:
matcher.add(args.excludes, False)
matcher.add_inclexcl(args.pattern)
def create_inner(archive, cache):
# Add cache dir to inode_skip list
@ -424,17 +435,7 @@ class Archiver:
if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )):
logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8')
matcher = PatternMatcher()
if args.excludes:
matcher.add(args.excludes, False)
include_patterns = []
if args.paths:
include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in args.paths)
matcher.add(include_patterns, True)
matcher.fallback = not include_patterns
matcher, include_patterns = self.build_matcher(args.pattern, args.paths)
output_list = args.output_list
dry_run = args.dry_run
@ -893,8 +894,9 @@ class Archiver:
helptext = collections.OrderedDict()
helptext['patterns'] = textwrap.dedent('''
Exclusion patterns support four separate styles, fnmatch, shell, regular
expressions and path prefixes. By default, fnmatch is used. If followed
File patterns support four separate styles, fnmatch, shell, regular
expressions and path prefixes. By default, fnmatch is used for
`--exclude` patterns and shell-style is used for `--pattern`. If followed
by a colon (':') the first two characters of a pattern are used as a
style selector. Explicit style selection is necessary when a
non-default style is desired or when the desired pattern starts with
@ -902,12 +904,12 @@ class Archiver:
`Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector `fm:`
This is the default style. These patterns use a variant of shell
pattern syntax, with '*' matching any number of characters, '?'
matching any single character, '[...]' matching any single
character specified, including ranges, and '[!...]' matching any
character not specified. For the purpose of these patterns, the
path separator ('\\' for Windows and '/' on other systems) is not
This is the default style for --exclude and --exclude-from.
These patterns use a variant of shell pattern syntax, with '*' matching
any number of characters, '?' matching any single character, '[...]'
matching any single character specified, including ranges, and '[!...]'
matching any character not specified. For the purpose of these patterns,
the path separator ('\\' for Windows and '/' on other systems) is not
treated specially. Wrap meta-characters in brackets for a literal
match (i.e. `[?]` to match the literal character `?`). For a path
to match a pattern, it must completely match from start to end, or
@ -918,6 +920,7 @@ class Archiver:
Shell-style patterns, selector `sh:`
This is the default style for --pattern and --patterns-from.
Like fnmatch patterns these are similar to shell patterns. The difference
is that the pattern may include `**/` for matching zero or more directory
levels, `*` for matching zero or more arbitrary characters with the
@ -978,7 +981,44 @@ class Archiver:
re:^/home/[^/]\.tmp/
sh:/home/*/.thumbnails
EOF
$ borg create --exclude-from exclude.txt backup /\n\n''')
$ borg create --exclude-from exclude.txt backup /
# exclude the contents of /data/docs/ but not /data/docs/pdf
$ borg create -e +/data/docs/pdf -e /data/docs/ backup /
# equivalent:
$ borg create -e +pm:/data/docs/pdf -e -pm:/data/docs/ backup /
A more general way to define filename matching patterns may be passed via
`--pattern` and `--patterns-from`. Using these options, you may specify the
backup roots (starting points) and patterns for inclusion/exclusion. A
root path starts with the prefix `R`, followed by a path (a plain path, not a
file pattern). An include rule is specified by `+` followed by a pattern.
Exclude rules start with a `-`.
Inclusion patterns are useful to e.g. exclude the contents of a directory
except for some important files in this directory. The first matching pattern
is used so if an include pattern matches before an exclude pattern, the file
is backed up.
Note that the default pattern style for `--pattern` and `--patterns-from` is
shell style (`sh:`), so those patterns behave like rsync include/exclude patterns.
An example `--patterns-from` file could look like that::
R /
# can be rebuild
- /home/*/.cache
# they're downloads for a reason
- /home/*/Downloads
# susan is a nice person
# include susans home
+ /home/susan
# ... and its contents
+ /home/susan/*
# don't backup the other home directories
- /home/*
\n\n''')
helptext['placeholders'] = textwrap.dedent('''
Repository (or Archive) URLs, --prefix and --remote-path values support these
placeholders:
@ -1339,10 +1379,10 @@ class Archiver:
subparser.add_argument('--filter', dest='output_filter', metavar='STATUSCHARS',
help='only display items with the given status characters')
subparser.add_argument('-e', '--exclude', dest='excludes',
type=parse_pattern, action='append',
type=parse_exclude_pattern, action='append', dest='pattern',
metavar="PATTERN", help='exclude paths matching PATTERN')
subparser.add_argument('--exclude-from', dest='exclude_files',
type=argparse.FileType('r'), action='append',
type=argparse.FileType('r'), action='append', default=[],
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
subparser.add_argument('--exclude-caches', dest='exclude_caches',
action='store_true', default=False,
@ -1353,6 +1393,13 @@ class Archiver:
subparser.add_argument('--keep-tag-files', dest='keep_tag_files',
action='store_true', default=False,
help='keep tag files of excluded caches/directories')
subparser.add_argument('--pattern', dest='pattern',
type=parse_inclexcl_pattern, action='append',
metavar="PATTERN", help='include/exclude paths matching PATTERN')
subparser.set_defaults(pattern=[])
subparser.add_argument('--patterns-from', dest='pattern_files',
type=argparse.FileType('r'), action='append', default=[],
metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line')
subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
type=int, default=300, metavar='SECONDS',
help='write checkpoint every SECONDS seconds (Default: 300)')
@ -1423,11 +1470,18 @@ class Archiver:
default=False, action='store_true',
help='do not actually change any files')
subparser.add_argument('-e', '--exclude', dest='excludes',
type=parse_pattern, action='append',
type=parse_exclude_pattern, action='append', dest='pattern',
metavar="PATTERN", help='exclude paths matching PATTERN')
subparser.add_argument('--exclude-from', dest='exclude_files',
type=argparse.FileType('r'), action='append',
type=argparse.FileType('r'), action='append', default=[],
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
subparser.add_argument('--pattern', dest='pattern',
type=parse_inclexcl_pattern, action='append',
metavar="PATTERN", help='include/exclude paths matching PATTERN')
subparser.set_defaults(pattern=[])
subparser.add_argument('--patterns-from', dest='pattern_files',
type=argparse.FileType('r'), action='append', default=[],
metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line')
subparser.add_argument('--numeric-owner', dest='numeric_owner',
action='store_true', default=False,
help='only obey numeric user and group identifiers')
@ -1982,7 +2036,7 @@ class Archiver:
args = self.preprocess_args(args)
parser = self.build_parser(args)
args = parser.parse_args(args or ['-h'])
update_excludes(args)
update_patterns(args)
return args
def run(self, args):

View file

@ -312,17 +312,37 @@ def load_excludes(fh):
both line ends are ignored.
"""
patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#'))
return [parse_pattern(pattern) for pattern in patterns if pattern]
return [parse_exclude_pattern(pattern)
for pattern in patterns if pattern]
def update_excludes(args):
"""Merge exclude patterns from files with those on command line."""
if hasattr(args, 'exclude_files') and args.exclude_files:
if not hasattr(args, 'excludes') or args.excludes is None:
args.excludes = []
for file in args.exclude_files:
args.excludes += load_excludes(file)
file.close()
def load_patterns(fh):
"""Load and parse include/exclude/root patterns from file object.
Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
"""
patternlines = (line for line in (i.strip() for i in fh) if not line.startswith('#'))
roots = []
inclexclpatterns = []
for patternline in patternlines:
pattern = parse_inclexcl_pattern(patternline)
if pattern:
if pattern.ptype is RootPath:
roots.append(pattern.pattern)
else:
inclexclpatterns.append(pattern)
return roots, inclexclpatterns
def update_patterns(args):
"""Merge patterns from exclude- and pattern-files with those on command line."""
for file in args.pattern_files:
roots, inclexclpatterns = load_patterns(file)
args.paths += roots
args.pattern += inclexclpatterns
file.close()
for file in args.exclude_files:
args.pattern += load_excludes(file)
file.close()
class PatternMatcher:
@ -338,6 +358,12 @@ class PatternMatcher:
"""
self._items.extend((i, value) for i in patterns)
def add_inclexcl(self, patterns):
"""Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
the match function when one of the given patterns matches.
"""
self._items.extend(patterns)
def match(self, path):
for (pattern, value) in self._items:
if pattern.match(path):
@ -489,6 +515,8 @@ _PATTERN_STYLES = set([
_PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES)
InclExclPattern = namedtuple('InclExclPattern', 'pattern ptype')
RootPath = object()
def parse_pattern(pattern, fallback=FnmatchPattern):
"""Read pattern from string and return an instance of the appropriate implementation class.
@ -506,6 +534,34 @@ def parse_pattern(pattern, fallback=FnmatchPattern):
return cls(pattern)
def parse_exclude_pattern(pattern, fallback=FnmatchPattern):
"""Read pattern from string and return an instance of the appropriate implementation class.
"""
epattern = parse_pattern(pattern, fallback)
return InclExclPattern(epattern, False)
def parse_inclexcl_pattern(pattern, fallback=ShellPattern):
"""Read pattern from string and return a InclExclPattern object."""
type_prefix_map = {
'-': False,
'+': True,
'R': RootPath,
'r': RootPath,
}
ptype = None
if len(pattern) > 1 and pattern[0] in type_prefix_map:
(ptype, pattern) = (type_prefix_map[pattern[0]], pattern[1:])
pattern = pattern.lstrip()
if ptype is None or not pattern:
raise argparse.ArgumentTypeError("Unable to parse pattern: {}".format(pattern))
if ptype is RootPath:
pobj = pattern
else:
pobj = parse_pattern(pattern, fallback)
return InclExclPattern(pobj, ptype)
def timestamp(s):
"""Convert a --timestamp=s argument to a datetime object"""
try:

View file

@ -9,12 +9,13 @@ import sys
import msgpack
import msgpack.fallback
import time
import argparse
from ..helpers import Location, format_file_size, format_timedelta, format_line, PlaceholderError, make_path_safe, \
prune_within, prune_split, get_cache_dir, get_keys_dir, get_security_dir, Statistics, is_slow_msgpack, \
yes, TRUISH, FALSISH, DEFAULTISH, \
StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \
ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, load_patterns, parse_pattern, \
PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, \
Buffer
from . import BaseTestCase, FakeInputs
@ -424,7 +425,7 @@ def test_invalid_unicode_pattern(pattern):
(["pp:aaabbb"], None),
(["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]),
])
def test_patterns_from_file(tmpdir, lines, expected):
def test_exclude_patterns_from_file(tmpdir, lines, expected):
files = [
'/data/something00.txt', '/more/data', '/home',
' #/wsfoobar',
@ -434,7 +435,7 @@ def test_patterns_from_file(tmpdir, lines, expected):
def evaluate(filename):
matcher = PatternMatcher(fallback=True)
matcher.add(load_excludes(open(filename, "rt")), False)
matcher.add_inclexcl(load_excludes(open(filename, "rt")))
return [path for path in files if matcher.match(path)]
exclfile = tmpdir.join("exclude.txt")
@ -445,6 +446,93 @@ def test_patterns_from_file(tmpdir, lines, expected):
assert evaluate(str(exclfile)) == (files if expected is None else expected)
@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [
# "None" means all files, i.e. none excluded
([], [], 0),
(["# Comment only"], [], 0),
(["- *"], [], 1),
(["+fm:*/something00.txt",
"-/data"], [], 2),
(["R /"], ["/"], 0),
(["R /",
"# comment"], ["/"], 0),
(["# comment",
"- /data",
"R /home"], ["/home"], 1),
])
def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns):
def evaluate(filename):
matcher = PatternMatcher(fallback=True)
roots, inclexclpatterns = load_patterns(open(filename, "rt"))
return roots, len(inclexclpatterns)
patternfile = tmpdir.join("exclude.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
roots, numpatterns = evaluate(str(patternfile))
assert roots == expected_roots
assert numpatterns == expected_numpatterns
@pytest.mark.parametrize("lines", [
(["X /data"]), # illegal pattern type prefix
(["/data"]), # need a pattern type prefix
])
def test_load_invalid_patterns_from_file(tmpdir, lines):
patternfile = tmpdir.join("exclude.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
filename = str(patternfile)
with pytest.raises(argparse.ArgumentTypeError):
matcher = PatternMatcher(fallback=True)
roots, inclexclpatterns = load_patterns(open(filename, "rt"))
@pytest.mark.parametrize("lines, expected", [
# "None" means all files, i.e. none excluded
([], None),
(["# Comment only"], None),
(["- *"], []),
# default match type is sh: for patterns -> * doesn't match a /
(["-*/something0?.txt"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt',
'/home', '/home/leo', '/home/leo/t', '/home/other']),
(["-fm:*/something00.txt"],
['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["-fm:*/something0?.txt"],
["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+/*/something0?.txt",
"-/data"],
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+fm:*/something00.txt",
"-/data"],
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+fm:/home/leo",
"-/home/"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
])
def test_inclexcl_patterns_from_file(tmpdir, lines, expected):
files = [
'/data', '/data/something00.txt', '/data/subdir/something01.txt',
'/home', '/home/leo', '/home/leo/t', '/home/other'
]
def evaluate(filename):
matcher = PatternMatcher(fallback=True)
roots, inclexclpatterns = load_patterns(open(filename, "rt"))
matcher.add_inclexcl(inclexclpatterns)
return [path for path in files if matcher.match(path)]
patternfile = tmpdir.join("exclude.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
assert evaluate(str(patternfile)) == (files if expected is None else expected)
@pytest.mark.parametrize("pattern, cls", [
("", FnmatchPattern),