cleanup: drop leftover pack_id==chunk_id references after sha256 naming, refs #8572

Remove the dead BORG_TESTONLY_SHA256_PACK_ID tox env and CI job, fix the packs.rst pack-id docs, and reword comments/tests to describe sha256 pack naming instead of the removed shortcut.
2026-06-20 15:09:01 -04:00 · 2026-06-18 19:32:59 +05:30 · 2026-06-18 19:32:59 +05:30 · 0ece418991
commit 0ece418991
parent fd17753561
5 changed files with 11 additions and 74 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -703,57 +703,3 @@ jobs:
          report_type: coverage
          env_vars: OS,python
          files: coverage.xml
-
-  sha256_pack_id_tests:
-    name: sha256 pack-id (informational)
-    needs: [lint]
-    runs-on: ubuntu-24.04
-    timeout-minutes: 90
-    continue-on-error: true
-    concurrency:
-      group: sha256-pack-id-${{ github.head_ref || github.ref }}
-      cancel-in-progress: false
-
-    steps:
-    - uses: actions/checkout@v6
-      with:
-        fetch-depth: 0
-        fetch-tags: true
-
-    - name: Set up Python 3.12
-      uses: actions/setup-python@v6
-      with:
-        python-version: "3.12"
-
-    - name: Cache pip
-      uses: actions/cache@v5
-      with:
-        path: ~/.cache/pip
-        key: ${{ runner.os }}-${{ runner.arch }}-pip-sha256-pack-id-${{ hashFiles('requirements.d/development.lock.txt') }}
-        restore-keys: |
-          ${{ runner.os }}-${{ runner.arch }}-pip-
-
-    - name: Cache tox environments
-      uses: actions/cache@v5
-      with:
-        path: .tox
-        key: ${{ runner.os }}-${{ runner.arch }}-tox-sha256-pack-id-${{ hashFiles('requirements.d/development.lock.txt', 'pyproject.toml') }}
-        restore-keys: |
-          ${{ runner.os }}-${{ runner.arch }}-tox-sha256-pack-id-
-
-    - name: Install Linux packages
-      run: |
-        sudo apt-get update
-        sudo apt-get install -y pkg-config build-essential
-        sudo apt-get install -y libssl-dev libacl1-dev liblz4-dev
-
-    - name: Install Python requirements
-      run: |
-        python -m pip install --upgrade pip setuptools wheel
-        pip install -r requirements.d/development.lock.txt
-
-    - name: Install borgbackup
-      run: pip install -ve ".[cockpit,s3,sftp,rclone]"
-
-    - name: Run tests with sha256 pack-ids
-      run: tox -e sha256-pack-id
--- a/docs/internals/packs.rst
+++ b/docs/internals/packs.rst
@ -71,12 +71,13 @@ Blobs follow one another contiguously with no padding::
 Pack ID
 ~~~~~~~

-The pack ID equals the ``chunk_id`` of the blob it contains::
+The pack ID is the SHA-256 of the pack file's bytes::

-    pack_id = chunk_id
+    pack_id = sha256(pack_bytes)

-Since ``chunk_id`` is the ID hash of the plaintext, the filename commits to the
-content. ``borg check`` can detect silent corruption without decrypting any blob.
+Content-addressing the file by its own bytes makes the name commit to the
+content, so borgstore can verify and cache it and ``borg check`` can detect
+silent corruption of the stored file.

 Namespace
 ~~~~~~~~~
--- a/pyproject.toml
+++ b/pyproject.toml
@ -257,11 +257,6 @@ extras = ["pyfuse3", "sftp", "s3", "rclone"]
 set_env = {BORG_FUSE_IMPL = "mfusepy"}
 extras = ["mfusepy", "sftp", "s3", "rclone"]

-# Informational env: forces sha256 pack_ids even with max_count=1 to expose
-# code that still assumes pack_id == chunk_id. Run: tox -e sha256-pack-id
-[tool.tox.env.sha256-pack-id]
-set_env = {BORG_TESTONLY_SHA256_PACK_ID = "1"}
-
 [tool.tox.env.ruff]
 skip_install = true
 deps = ["ruff"]
--- a/src/borg/repository.py
+++ b/src/borg/repository.py
@ -115,9 +115,8 @@ class PackWriter:
    uses that repository's single, authoritative index (see the chunks property), so
    there is never a second copy to keep in sync.  Unit tests pass an explicit index.

-    At max_count=1 (N=1 phase) each put() maps exactly one chunk to one pack.
-    Raising max_count later (N>1 phase) enables real packing without touching
-    this class's interface.
+    max_count bounds how many chunks a pack accumulates before flush() writes it.
+    Raising it produces larger packs without changing this class's interface.
    """

    def __init__(self, store, *, max_count=1, chunks=None, repository=None):
@ -170,9 +169,8 @@ class PackWriter:
        # that incremental string concatenation would cause in Python).
        pack_data = b"".join(cdata for _, cdata in self._pieces)

-        # Name the pack by the hash of its bytes (content-addressing), independent of how many
-        # chunks it holds or what their ids are. This is why a single-chunk pack's name is not its
-        # chunk_id: the pack and the chunk are different objects with different identities.
+        # Name the pack by the SHA-256 of its bytes: the name commits to the stored content,
+        # so borgstore can verify and cache the file.
        pack_id = sha256(pack_data).digest()

        # Record (chunk_id, pack_id, obj_offset, obj_size) for every piece.
@ -659,9 +657,8 @@ class Repository:
                    # add all existing objects to the index.
                    # borg check: the index may have corrupted objects (we did not delete them)
                    # borg check --repair: the index will only have non-corrupted objects.
-                    # the pack file name is the pack_id (sha256(pack_bytes)), which is not the
-                    # chunk_id, so recover each object's real (chunk_id, offset, size) from its
-                    # on-disk header rather than assuming pack file name == chunk_id.
+                    # the pack file name is the pack_id; each object's chunk_id, offset and size
+                    # come from its on-disk header, so scan the headers to rebuild the index.
                    pack_id = hex_to_bin(info.name)
                    for chunk_id, obj_offset, obj_size in RepoObj.iter_object_headers(pack):
                        chunks[chunk_id] = ChunkIndexEntry(
--- a/src/borg/testsuite/repository_test.py
+++ b/src/borg/testsuite/repository_test.py
@ -223,7 +223,6 @@ def test_pack_writer_n1_flush():
    stored_id, pack_id, obj_offset, obj_size = results[0]
    assert stored_id == chunk_id
    assert pack_id == sha256(cdata).digest()
-    assert pack_id != chunk_id
    assert obj_offset == 0
    assert obj_size == len(cdata)

@ -343,7 +342,6 @@ def test_put_marks_id_in_chunk_index(tmp_path):
        entry = repository._chunks.get(id1)
        assert entry is not None
        assert entry.pack_id == sha256(fchunk(b"ZEROS")).digest()
-        assert entry.pack_id != id1
        assert entry.size == 0  # uncompressed size filled in by cache layer