mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-20 15:09:01 -04:00
cleanup: drop leftover pack_id==chunk_id references after sha256 naming, refs #8572
Remove the dead BORG_TESTONLY_SHA256_PACK_ID tox env and CI job, fix the packs.rst pack-id docs, and reword comments/tests to describe sha256 pack naming instead of the removed shortcut.
This commit is contained in:
parent
fd17753561
commit
0ece418991
5 changed files with 11 additions and 74 deletions
54
.github/workflows/ci.yml
vendored
54
.github/workflows/ci.yml
vendored
|
|
@ -703,57 +703,3 @@ jobs:
|
|||
report_type: coverage
|
||||
env_vars: OS,python
|
||||
files: coverage.xml
|
||||
|
||||
sha256_pack_id_tests:
|
||||
name: sha256 pack-id (informational)
|
||||
needs: [lint]
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 90
|
||||
continue-on-error: true
|
||||
concurrency:
|
||||
group: sha256-pack-id-${{ github.head_ref || github.ref }}
|
||||
cancel-in-progress: false
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
fetch-tags: true
|
||||
|
||||
- name: Set up Python 3.12
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.12"
|
||||
|
||||
- name: Cache pip
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-${{ runner.arch }}-pip-sha256-pack-id-${{ hashFiles('requirements.d/development.lock.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-${{ runner.arch }}-pip-
|
||||
|
||||
- name: Cache tox environments
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: .tox
|
||||
key: ${{ runner.os }}-${{ runner.arch }}-tox-sha256-pack-id-${{ hashFiles('requirements.d/development.lock.txt', 'pyproject.toml') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-${{ runner.arch }}-tox-sha256-pack-id-
|
||||
|
||||
- name: Install Linux packages
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y pkg-config build-essential
|
||||
sudo apt-get install -y libssl-dev libacl1-dev liblz4-dev
|
||||
|
||||
- name: Install Python requirements
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
pip install -r requirements.d/development.lock.txt
|
||||
|
||||
- name: Install borgbackup
|
||||
run: pip install -ve ".[cockpit,s3,sftp,rclone]"
|
||||
|
||||
- name: Run tests with sha256 pack-ids
|
||||
run: tox -e sha256-pack-id
|
||||
|
|
|
|||
|
|
@ -71,12 +71,13 @@ Blobs follow one another contiguously with no padding::
|
|||
Pack ID
|
||||
~~~~~~~
|
||||
|
||||
The pack ID equals the ``chunk_id`` of the blob it contains::
|
||||
The pack ID is the SHA-256 of the pack file's bytes::
|
||||
|
||||
pack_id = chunk_id
|
||||
pack_id = sha256(pack_bytes)
|
||||
|
||||
Since ``chunk_id`` is the ID hash of the plaintext, the filename commits to the
|
||||
content. ``borg check`` can detect silent corruption without decrypting any blob.
|
||||
Content-addressing the file by its own bytes makes the name commit to the
|
||||
content, so borgstore can verify and cache it and ``borg check`` can detect
|
||||
silent corruption of the stored file.
|
||||
|
||||
Namespace
|
||||
~~~~~~~~~
|
||||
|
|
|
|||
|
|
@ -257,11 +257,6 @@ extras = ["pyfuse3", "sftp", "s3", "rclone"]
|
|||
set_env = {BORG_FUSE_IMPL = "mfusepy"}
|
||||
extras = ["mfusepy", "sftp", "s3", "rclone"]
|
||||
|
||||
# Informational env: forces sha256 pack_ids even with max_count=1 to expose
|
||||
# code that still assumes pack_id == chunk_id. Run: tox -e sha256-pack-id
|
||||
[tool.tox.env.sha256-pack-id]
|
||||
set_env = {BORG_TESTONLY_SHA256_PACK_ID = "1"}
|
||||
|
||||
[tool.tox.env.ruff]
|
||||
skip_install = true
|
||||
deps = ["ruff"]
|
||||
|
|
|
|||
|
|
@ -115,9 +115,8 @@ class PackWriter:
|
|||
uses that repository's single, authoritative index (see the chunks property), so
|
||||
there is never a second copy to keep in sync. Unit tests pass an explicit index.
|
||||
|
||||
At max_count=1 (N=1 phase) each put() maps exactly one chunk to one pack.
|
||||
Raising max_count later (N>1 phase) enables real packing without touching
|
||||
this class's interface.
|
||||
max_count bounds how many chunks a pack accumulates before flush() writes it.
|
||||
Raising it produces larger packs without changing this class's interface.
|
||||
"""
|
||||
|
||||
def __init__(self, store, *, max_count=1, chunks=None, repository=None):
|
||||
|
|
@ -170,9 +169,8 @@ class PackWriter:
|
|||
# that incremental string concatenation would cause in Python).
|
||||
pack_data = b"".join(cdata for _, cdata in self._pieces)
|
||||
|
||||
# Name the pack by the hash of its bytes (content-addressing), independent of how many
|
||||
# chunks it holds or what their ids are. This is why a single-chunk pack's name is not its
|
||||
# chunk_id: the pack and the chunk are different objects with different identities.
|
||||
# Name the pack by the SHA-256 of its bytes: the name commits to the stored content,
|
||||
# so borgstore can verify and cache the file.
|
||||
pack_id = sha256(pack_data).digest()
|
||||
|
||||
# Record (chunk_id, pack_id, obj_offset, obj_size) for every piece.
|
||||
|
|
@ -659,9 +657,8 @@ class Repository:
|
|||
# add all existing objects to the index.
|
||||
# borg check: the index may have corrupted objects (we did not delete them)
|
||||
# borg check --repair: the index will only have non-corrupted objects.
|
||||
# the pack file name is the pack_id (sha256(pack_bytes)), which is not the
|
||||
# chunk_id, so recover each object's real (chunk_id, offset, size) from its
|
||||
# on-disk header rather than assuming pack file name == chunk_id.
|
||||
# the pack file name is the pack_id; each object's chunk_id, offset and size
|
||||
# come from its on-disk header, so scan the headers to rebuild the index.
|
||||
pack_id = hex_to_bin(info.name)
|
||||
for chunk_id, obj_offset, obj_size in RepoObj.iter_object_headers(pack):
|
||||
chunks[chunk_id] = ChunkIndexEntry(
|
||||
|
|
|
|||
|
|
@ -223,7 +223,6 @@ def test_pack_writer_n1_flush():
|
|||
stored_id, pack_id, obj_offset, obj_size = results[0]
|
||||
assert stored_id == chunk_id
|
||||
assert pack_id == sha256(cdata).digest()
|
||||
assert pack_id != chunk_id
|
||||
assert obj_offset == 0
|
||||
assert obj_size == len(cdata)
|
||||
|
||||
|
|
@ -343,7 +342,6 @@ def test_put_marks_id_in_chunk_index(tmp_path):
|
|||
entry = repository._chunks.get(id1)
|
||||
assert entry is not None
|
||||
assert entry.pack_id == sha256(fchunk(b"ZEROS")).digest()
|
||||
assert entry.pack_id != id1
|
||||
assert entry.size == 0 # uncompressed size filled in by cache layer
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue