mirror of
https://github.com/borgbackup/borg.git
synced 2026-03-18 00:26:13 -04:00
Fix FUSE low linear read speed on large files, fixes #5067
This patch is a quick fix for an inefficient seek algorithm used in read() function: 1) On each read call chunk list for given file is regenerated. 2) Finding needed chunk takes a lot of time for large offsets. For 64 Gb files read speed drops to 16 MB/s, for 128 Gb - to 8 MB/s, and so on. After applying this patch read speed becomes 270 MB/s, and does not depend on read position inside file. Co-authored-by: Thomas Waldmann <tw@waldmann-edv.de>
This commit is contained in:
parent
f832aa58cc
commit
f033c7fc97
1 changed files with 36 additions and 2 deletions
|
|
@ -41,6 +41,12 @@ else:
|
|||
llfuse.main(single=True)
|
||||
return None
|
||||
|
||||
# size of some LRUCaches (1 element per simultaneously open file)
|
||||
# note: _inode_cache might have rather large elements - Item.chunks can be large!
|
||||
# also, simultaneously reading too many files should be avoided anyway.
|
||||
# thus, do not set FILES to high values.
|
||||
FILES = 4
|
||||
|
||||
|
||||
class ItemCache:
|
||||
"""
|
||||
|
|
@ -234,6 +240,8 @@ class FuseOperations(llfuse.Operations):
|
|||
# in the archives. For example archive directories or intermediate directories
|
||||
# not contained in archives.
|
||||
self.items = {}
|
||||
# cache up to <FILES> Items
|
||||
self._inode_cache = LRUCache(capacity=FILES, dispose=lambda _: None)
|
||||
# _inode_count is the current count of synthetic inodes, i.e. those in self.items
|
||||
self._inode_count = 0
|
||||
# Maps inode numbers to the inode number of the parent
|
||||
|
|
@ -249,6 +257,7 @@ class FuseOperations(llfuse.Operations):
|
|||
data_cache_capacity = int(os.environ.get('BORG_MOUNT_DATA_CACHE_ENTRIES', os.cpu_count() or 1))
|
||||
logger.debug('mount data cache capacity: %d chunks', data_cache_capacity)
|
||||
self.data_cache = LRUCache(capacity=data_cache_capacity, dispose=lambda _: None)
|
||||
self._last_pos = LRUCache(capacity=FILES, dispose=lambda _: None)
|
||||
|
||||
def _create_filesystem(self):
|
||||
self._create_dir(parent=1) # first call, create root dir (inode == 1)
|
||||
|
|
@ -512,10 +521,17 @@ class FuseOperations(llfuse.Operations):
|
|||
return stat_
|
||||
|
||||
def get_item(self, inode):
|
||||
item = self._inode_cache.get(inode)
|
||||
if item is not None:
|
||||
return item
|
||||
try:
|
||||
# this is a cheap get-from-dictionary operation, no need to cache the result.
|
||||
return self.items[inode]
|
||||
except KeyError:
|
||||
return self.cache.get(inode)
|
||||
# while self.cache does some internal caching, it has still quite some overhead, so we cache the result.
|
||||
item = self.cache.get(inode)
|
||||
self._inode_cache[inode] = item
|
||||
return item
|
||||
|
||||
def _find_inode(self, path, prefix=[]):
|
||||
segments = prefix + path.split(b'/')
|
||||
|
|
@ -603,9 +619,23 @@ class FuseOperations(llfuse.Operations):
|
|||
def read(self, fh, offset, size):
|
||||
parts = []
|
||||
item = self.get_item(fh)
|
||||
for id, s, csize in item.chunks:
|
||||
|
||||
# optimize for linear reads:
|
||||
# we cache the chunk number and the in-file offset of the chunk in _last_pos[fh]
|
||||
chunk_no, chunk_offset = self._last_pos.get(fh, (0, 0))
|
||||
if chunk_offset > offset:
|
||||
# this is not a linear read, so we lost track and need to start from beginning again...
|
||||
chunk_no, chunk_offset = (0, 0)
|
||||
|
||||
offset -= chunk_offset
|
||||
chunks = item.chunks
|
||||
# note: using index iteration to avoid frequently copying big (sub)lists by slicing
|
||||
for idx in range(chunk_no, len(chunks)):
|
||||
id, s, csize = chunks[idx]
|
||||
if s < offset:
|
||||
offset -= s
|
||||
chunk_offset += s
|
||||
chunk_no += 1
|
||||
continue
|
||||
n = min(size, s - offset)
|
||||
if id in self.data_cache:
|
||||
|
|
@ -622,6 +652,10 @@ class FuseOperations(llfuse.Operations):
|
|||
offset = 0
|
||||
size -= n
|
||||
if not size:
|
||||
if fh in self._last_pos:
|
||||
self._last_pos.upd(fh, (chunk_no, chunk_offset))
|
||||
else:
|
||||
self._last_pos[fh] = (chunk_no, chunk_offset)
|
||||
break
|
||||
return b''.join(parts)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue