cleanup: drop leftover pack_id==chunk_id references after sha256 naming, refs #8572

Remove the dead BORG_TESTONLY_SHA256_PACK_ID tox env and CI job, fix the packs.rst pack-id docs, and reword comments/tests to describe sha256 pack naming instead of the removed shortcut.
This commit is contained in:
Mrityunjay Raj 2026-06-18 19:32:59 +05:30
parent fd17753561
commit 0ece418991
5 changed files with 11 additions and 74 deletions

View file

@ -703,57 +703,3 @@ jobs:
report_type: coverage
env_vars: OS,python
files: coverage.xml
sha256_pack_id_tests:
name: sha256 pack-id (informational)
needs: [lint]
runs-on: ubuntu-24.04
timeout-minutes: 90
continue-on-error: true
concurrency:
group: sha256-pack-id-${{ github.head_ref || github.ref }}
cancel-in-progress: false
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
fetch-tags: true
- name: Set up Python 3.12
uses: actions/setup-python@v6
with:
python-version: "3.12"
- name: Cache pip
uses: actions/cache@v5
with:
path: ~/.cache/pip
key: ${{ runner.os }}-${{ runner.arch }}-pip-sha256-pack-id-${{ hashFiles('requirements.d/development.lock.txt') }}
restore-keys: |
${{ runner.os }}-${{ runner.arch }}-pip-
- name: Cache tox environments
uses: actions/cache@v5
with:
path: .tox
key: ${{ runner.os }}-${{ runner.arch }}-tox-sha256-pack-id-${{ hashFiles('requirements.d/development.lock.txt', 'pyproject.toml') }}
restore-keys: |
${{ runner.os }}-${{ runner.arch }}-tox-sha256-pack-id-
- name: Install Linux packages
run: |
sudo apt-get update
sudo apt-get install -y pkg-config build-essential
sudo apt-get install -y libssl-dev libacl1-dev liblz4-dev
- name: Install Python requirements
run: |
python -m pip install --upgrade pip setuptools wheel
pip install -r requirements.d/development.lock.txt
- name: Install borgbackup
run: pip install -ve ".[cockpit,s3,sftp,rclone]"
- name: Run tests with sha256 pack-ids
run: tox -e sha256-pack-id

View file

@ -71,12 +71,13 @@ Blobs follow one another contiguously with no padding::
Pack ID
~~~~~~~
The pack ID equals the ``chunk_id`` of the blob it contains::
The pack ID is the SHA-256 of the pack file's bytes::
pack_id = chunk_id
pack_id = sha256(pack_bytes)
Since ``chunk_id`` is the ID hash of the plaintext, the filename commits to the
content. ``borg check`` can detect silent corruption without decrypting any blob.
Content-addressing the file by its own bytes makes the name commit to the
content, so borgstore can verify and cache it and ``borg check`` can detect
silent corruption of the stored file.
Namespace
~~~~~~~~~

View file

@ -257,11 +257,6 @@ extras = ["pyfuse3", "sftp", "s3", "rclone"]
set_env = {BORG_FUSE_IMPL = "mfusepy"}
extras = ["mfusepy", "sftp", "s3", "rclone"]
# Informational env: forces sha256 pack_ids even with max_count=1 to expose
# code that still assumes pack_id == chunk_id. Run: tox -e sha256-pack-id
[tool.tox.env.sha256-pack-id]
set_env = {BORG_TESTONLY_SHA256_PACK_ID = "1"}
[tool.tox.env.ruff]
skip_install = true
deps = ["ruff"]

View file

@ -115,9 +115,8 @@ class PackWriter:
uses that repository's single, authoritative index (see the chunks property), so
there is never a second copy to keep in sync. Unit tests pass an explicit index.
At max_count=1 (N=1 phase) each put() maps exactly one chunk to one pack.
Raising max_count later (N>1 phase) enables real packing without touching
this class's interface.
max_count bounds how many chunks a pack accumulates before flush() writes it.
Raising it produces larger packs without changing this class's interface.
"""
def __init__(self, store, *, max_count=1, chunks=None, repository=None):
@ -170,9 +169,8 @@ class PackWriter:
# that incremental string concatenation would cause in Python).
pack_data = b"".join(cdata for _, cdata in self._pieces)
# Name the pack by the hash of its bytes (content-addressing), independent of how many
# chunks it holds or what their ids are. This is why a single-chunk pack's name is not its
# chunk_id: the pack and the chunk are different objects with different identities.
# Name the pack by the SHA-256 of its bytes: the name commits to the stored content,
# so borgstore can verify and cache the file.
pack_id = sha256(pack_data).digest()
# Record (chunk_id, pack_id, obj_offset, obj_size) for every piece.
@ -659,9 +657,8 @@ class Repository:
# add all existing objects to the index.
# borg check: the index may have corrupted objects (we did not delete them)
# borg check --repair: the index will only have non-corrupted objects.
# the pack file name is the pack_id (sha256(pack_bytes)), which is not the
# chunk_id, so recover each object's real (chunk_id, offset, size) from its
# on-disk header rather than assuming pack file name == chunk_id.
# the pack file name is the pack_id; each object's chunk_id, offset and size
# come from its on-disk header, so scan the headers to rebuild the index.
pack_id = hex_to_bin(info.name)
for chunk_id, obj_offset, obj_size in RepoObj.iter_object_headers(pack):
chunks[chunk_id] = ChunkIndexEntry(

View file

@ -223,7 +223,6 @@ def test_pack_writer_n1_flush():
stored_id, pack_id, obj_offset, obj_size = results[0]
assert stored_id == chunk_id
assert pack_id == sha256(cdata).digest()
assert pack_id != chunk_id
assert obj_offset == 0
assert obj_size == len(cdata)
@ -343,7 +342,6 @@ def test_put_marks_id_in_chunk_index(tmp_path):
entry = repository._chunks.get(id1)
assert entry is not None
assert entry.pack_id == sha256(fchunk(b"ZEROS")).digest()
assert entry.pack_id != id1
assert entry.size == 0 # uncompressed size filled in by cache layer