From 2aef3a8187655d6077e0a9095010f7049d47b148 Mon Sep 17 00:00:00 2001 From: shreyash bhosale Date: Wed, 28 Jan 2026 12:31:17 +0530 Subject: [PATCH 1/5] Skip download if collection artifact exists --- lib/ansible/galaxy/collection/__init__.py | 23 ++++++++-- .../collection/concrete_artifact_manager.py | 31 ++++++++++++- test/units/galaxy/test_collection.py | 46 +++++++++++++++++++ 3 files changed, 95 insertions(+), 5 deletions(-) diff --git a/lib/ansible/galaxy/collection/__init__.py b/lib/ansible/galaxy/collection/__init__.py index 433af9f5efa..80260dcc520 100644 --- a/lib/ansible/galaxy/collection/__init__.py +++ b/lib/ansible/galaxy/collection/__init__.py @@ -554,6 +554,20 @@ def download_collections( ) continue + b_dest_path = None + b_expected_basename = artifacts_manager.get_expected_artifact_basename(concrete_coll_pin) + if b_expected_basename is not None: + b_dest_path = os.path.join(b_output_path, b_expected_basename) + if artifacts_manager._artifact_exists(b_dest_path): + display.display( + "Skipping download; file already exists: %s" % to_text(b_dest_path) + ) + requirements.append({ + 'name': to_native(os.path.basename(b_dest_path)), + 'version': concrete_coll_pin.ver, + }) + continue + display.display( u"Downloading collection '{coll!s}' to '{path!s}'". format(coll=to_text(concrete_coll_pin), path=to_text(b_output_path)), @@ -561,10 +575,11 @@ def download_collections( b_src_path = artifacts_manager.get_artifact_path_from_unknown(concrete_coll_pin) - b_dest_path = os.path.join( - b_output_path, - os.path.basename(b_src_path), - ) + if b_dest_path is None: + b_dest_path = os.path.join( + b_output_path, + os.path.basename(b_src_path), + ) if concrete_coll_pin.is_dir: b_dest_path = to_bytes( diff --git a/lib/ansible/galaxy/collection/concrete_artifact_manager.py b/lib/ansible/galaxy/collection/concrete_artifact_manager.py index 1659bc46b49..5c4e47707cc 100644 --- a/lib/ansible/galaxy/collection/concrete_artifact_manager.py +++ b/lib/ansible/galaxy/collection/concrete_artifact_manager.py @@ -15,7 +15,7 @@ import yaml from contextlib import contextmanager from hashlib import sha256 from urllib.error import URLError -from urllib.parse import urldefrag +from urllib.parse import urldefrag, urlsplit from shutil import rmtree from tempfile import mkdtemp @@ -126,6 +126,35 @@ class ConcreteArtifactsManager: "signatures": signatures, } + @staticmethod + def _artifact_exists(b_artifact_path): + # type: (bytes) -> bool + return os.path.isfile(b_artifact_path) + + @staticmethod + def _get_url_basename(url): + # type: (str) -> t.Optional[bytes] + url_path = urlsplit(url).path + basename = os.path.basename(url_path) + if not basename: + return None + return to_bytes(basename, errors='surrogate_or_strict') + + def get_expected_artifact_basename(self, collection): + # type: (t.Union[Candidate, Requirement]) -> t.Optional[bytes] + if collection.is_dir or collection.is_scm or collection.is_subdirs: + return None + if collection.is_url: + return self._get_url_basename(collection.src) + if collection.is_file: + return os.path.basename(to_bytes(collection.src, errors='surrogate_or_strict')) + + try: + url = self._galaxy_collection_cache[collection][0] + except KeyError: + return None + return self._get_url_basename(url) + def get_galaxy_artifact_path(self, collection): # type: (t.Union[Candidate, Requirement]) -> bytes """Given a Galaxy-stored collection, return a cached path. diff --git a/test/units/galaxy/test_collection.py b/test/units/galaxy/test_collection.py index 8da860da284..3d33ecd9711 100644 --- a/test/units/galaxy/test_collection.py +++ b/test/units/galaxy/test_collection.py @@ -22,6 +22,7 @@ from ansible.cli.galaxy import GalaxyCLI from ansible.config import manager from ansible.errors import AnsibleError from ansible.galaxy import api, collection, token +from ansible.galaxy.dependency_resolution.dataclasses import Candidate from ansible.module_utils.common.sentinel import Sentinel from ansible.module_utils.common.text.converters import to_bytes, to_native, to_text from ansible.module_utils.common.file import S_IRWU_RG_RO @@ -927,6 +928,51 @@ def test_download_file_hash_mismatch(tmp_path_factory, monkeypatch): collection._download_file('http://google.com/file', temp_dir, 'bad', True) +def test_download_collections_skips_existing_artifact(tmp_path_factory, monkeypatch): + output_dir = tmp_path_factory.mktemp('download-output') + temp_dir = tmp_path_factory.mktemp('download-temp') + b_output_dir = to_bytes(output_dir) + + artifacts_manager = collection.concrete_artifact_manager.ConcreteArtifactsManager( + to_bytes(temp_dir), + validate_certs=False, + ) + + url = 'https://galaxy.example.com/downloads/ansible_namespace-collection-1.2.3.tar.gz' + candidate = Candidate('ansible_namespace.collection', '1.2.3', 'https://galaxy.example.com', 'galaxy', None) + artifacts_manager.save_collection_source( + candidate, + url, + '', + token.NoTokenSentinel, + 'https://galaxy.example.com/api/v3/collections/ansible_namespace/collection/versions/1.2.3', + [], + ) + + b_existing_path = os.path.join(b_output_dir, to_bytes('ansible_namespace-collection-1.2.3.tar.gz')) + with open(b_existing_path, 'wb') as existing_file: + existing_file.write(b'\x00\x01') + + mock_display = MagicMock() + monkeypatch.setattr(Display, 'display', mock_display) + + mock_resolve = MagicMock() + mock_resolve.return_value = {candidate.fqcn: candidate} + monkeypatch.setattr(collection, '_resolve_depenency_map', mock_resolve) + + mock_get_artifact = MagicMock() + monkeypatch.setattr(artifacts_manager, 'get_artifact_path_from_unknown', mock_get_artifact) + + collection.download_collections([], to_text(output_dir), [], False, False, artifacts_manager) + + skip_messages = [ + call[1][0] for call in mock_display.mock_calls + if call[1] and isinstance(call[1][0], str) and 'Skipping download; file already exists:' in call[1][0] + ] + assert len(skip_messages) == 1 + assert skip_messages[0].endswith(to_text(b_existing_path)) + assert mock_get_artifact.call_count == 0 + def test_extract_tar_file_invalid_hash(tmp_tarfile): temp_dir, tfile, filename, dummy = tmp_tarfile From 0ed14c2b3d1af9aea64aea04ac5f7769e96362dd Mon Sep 17 00:00:00 2001 From: shreyash bhosale Date: Wed, 28 Jan 2026 13:01:19 +0530 Subject: [PATCH 2/5] Skip download if collection artifact exists --- .../skip-download-if-artifact-exists.yml | 2 + lib/ansible/cli/galaxy.py | 4 +- lib/ansible/galaxy/collection/__init__.py | 21 +-- .../collection/concrete_artifact_manager.py | 30 ++++ test/units/galaxy/test_collection.py | 130 ++++++++++++++++-- 5 files changed, 170 insertions(+), 17 deletions(-) create mode 100644 changelogs/fragments/skip-download-if-artifact-exists.yml diff --git a/changelogs/fragments/skip-download-if-artifact-exists.yml b/changelogs/fragments/skip-download-if-artifact-exists.yml new file mode 100644 index 00000000000..79109240eb8 --- /dev/null +++ b/changelogs/fragments/skip-download-if-artifact-exists.yml @@ -0,0 +1,2 @@ +minor_changes: + - ansible-galaxy - Skip re-downloading collection artifacts when a matching file already exists unless ``--force`` is used. diff --git a/lib/ansible/cli/galaxy.py b/lib/ansible/cli/galaxy.py index fa418a32bd3..cdc26ce4dda 100755 --- a/lib/ansible/cli/galaxy.py +++ b/lib/ansible/cli/galaxy.py @@ -288,7 +288,7 @@ class GalaxyCLI(CLI): collection.set_defaults(func=self.execute_collection) # to satisfy doc build collection_parser = collection.add_subparsers(metavar='COLLECTION_ACTION', dest='action') collection_parser.required = True - self.add_download_options(collection_parser, parents=[common, cache_options]) + self.add_download_options(collection_parser, parents=[common, cache_options, force]) self.add_init_options(collection_parser, parents=[common, force]) self.add_build_options(collection_parser, parents=[common, force]) self.add_publish_options(collection_parser, parents=[common]) @@ -1033,6 +1033,7 @@ class GalaxyCLI(CLI): collections = context.CLIARGS['args'] no_deps = context.CLIARGS['no_deps'] download_path = context.CLIARGS['download_path'] + force = context.CLIARGS['force'] requirements_file = context.CLIARGS['requirements'] if requirements_file: @@ -1051,6 +1052,7 @@ class GalaxyCLI(CLI): download_collections( requirements, download_path, self.api_servers, no_deps, context.CLIARGS['allow_pre_release'], + force=force, artifacts_manager=artifacts_manager, ) diff --git a/lib/ansible/galaxy/collection/__init__.py b/lib/ansible/galaxy/collection/__init__.py index 80260dcc520..780a71e120b 100644 --- a/lib/ansible/galaxy/collection/__init__.py +++ b/lib/ansible/galaxy/collection/__init__.py @@ -514,6 +514,7 @@ def download_collections( no_deps, # type: bool allow_pre_release, # type: bool artifacts_manager, # type: ConcreteArtifactsManager + force=False, # type: bool ): # type: (...) -> None """Download Ansible collections as their tarball from a Galaxy server to the path specified and creates a requirements file of the downloaded requirements to be used for an install. @@ -524,6 +525,7 @@ def download_collections( :param validate_certs: Whether to validate the certificate if downloading a tarball from a non-Galaxy host. :param no_deps: Ignore any collection dependencies and only download the base requirements. :param allow_pre_release: Do not ignore pre-release versions when selecting the latest. + :param force: Force re-download of existing artifacts. """ with _display_progress("Process download dependency map"): dep_map = _resolve_depenency_map( @@ -554,19 +556,22 @@ def download_collections( ) continue - b_dest_path = None b_expected_basename = artifacts_manager.get_expected_artifact_basename(concrete_coll_pin) if b_expected_basename is not None: b_dest_path = os.path.join(b_output_path, b_expected_basename) - if artifacts_manager._artifact_exists(b_dest_path): + if not force and artifacts_manager._artifact_exists(b_dest_path): + if artifacts_manager.validate_existing_artifact(concrete_coll_pin, b_dest_path): + display.display( + "Skipping download; file already exists: %s" % to_text(b_dest_path) + ) + requirements.append({ + 'name': to_native(os.path.basename(b_dest_path)), + 'version': concrete_coll_pin.ver, + }) + continue display.display( - "Skipping download; file already exists: %s" % to_text(b_dest_path) + "Existing artifact did not match requested collection; re-downloading: %s" % to_text(b_dest_path) ) - requirements.append({ - 'name': to_native(os.path.basename(b_dest_path)), - 'version': concrete_coll_pin.ver, - }) - continue display.display( u"Downloading collection '{coll!s}' to '{path!s}'". diff --git a/lib/ansible/galaxy/collection/concrete_artifact_manager.py b/lib/ansible/galaxy/collection/concrete_artifact_manager.py index 5c4e47707cc..893a5f8847d 100644 --- a/lib/ansible/galaxy/collection/concrete_artifact_manager.py +++ b/lib/ansible/galaxy/collection/concrete_artifact_manager.py @@ -39,6 +39,7 @@ from ansible.module_utils.common.sentinel import Sentinel from ansible.module_utils.common.yaml import yaml_load from ansible.module_utils.urls import open_url from ansible.utils.display import Display +from ansible.utils.hashing import secure_hash import ansible.constants as C @@ -155,6 +156,35 @@ class ConcreteArtifactsManager: return None return self._get_url_basename(url) + def get_expected_artifact_hash(self, collection): + # type: (t.Union[Candidate, Requirement]) -> t.Optional[str] + try: + _url, sha256_hash, _token = self._galaxy_collection_cache[collection] + except KeyError: + return None + return sha256_hash or None + + def validate_existing_artifact(self, collection, b_artifact_path): + # type: (t.Union[Candidate, Requirement], bytes) -> bool + try: + collection_meta = _get_meta_from_tar(b_artifact_path) + except AnsibleError: + return False + + if ( + collection_meta.get('namespace') != collection.namespace or + collection_meta.get('name') != collection.name or + collection_meta.get('version') != collection.ver + ): + return False + + expected_hash = self.get_expected_artifact_hash(collection) + if expected_hash: + actual_hash = secure_hash(to_native(b_artifact_path), hash_func=sha256) + return actual_hash == expected_hash + + return True + def get_galaxy_artifact_path(self, collection): # type: (t.Union[Candidate, Requirement]) -> bytes """Given a Galaxy-stored collection, return a cached path. diff --git a/test/units/galaxy/test_collection.py b/test/units/galaxy/test_collection.py index 3d33ecd9711..ce1bad2918c 100644 --- a/test/units/galaxy/test_collection.py +++ b/test/units/galaxy/test_collection.py @@ -29,7 +29,7 @@ from ansible.module_utils.common.file import S_IRWU_RG_RO import builtins from ansible.utils import context_objects as co from ansible.utils.display import Display -from ansible.utils.hashing import secure_hash_s +from ansible.utils.hashing import secure_hash, secure_hash_s @pytest.fixture(autouse=True) @@ -928,7 +928,7 @@ def test_download_file_hash_mismatch(tmp_path_factory, monkeypatch): collection._download_file('http://google.com/file', temp_dir, 'bad', True) -def test_download_collections_skips_existing_artifact(tmp_path_factory, monkeypatch): +def test_download_collections_skips_existing_artifact(tmp_path_factory, monkeypatch, manifest_template): output_dir = tmp_path_factory.mktemp('download-output') temp_dir = tmp_path_factory.mktemp('download-temp') b_output_dir = to_bytes(output_dir) @@ -938,21 +938,27 @@ def test_download_collections_skips_existing_artifact(tmp_path_factory, monkeypa validate_certs=False, ) + manifest_info = manifest_template(namespace='ansible_namespace', name='collection', version='1.2.3') url = 'https://galaxy.example.com/downloads/ansible_namespace-collection-1.2.3.tar.gz' candidate = Candidate('ansible_namespace.collection', '1.2.3', 'https://galaxy.example.com', 'galaxy', None) + + b_existing_path = os.path.join(b_output_dir, to_bytes('ansible_namespace-collection-1.2.3.tar.gz')) + with tarfile.open(b_existing_path, 'w:gz') as tfile: + b_data = to_bytes(json.dumps(manifest_info)) + tar_info = tarfile.TarInfo('MANIFEST.json') + tar_info.size = len(b_data) + tar_info.mode = S_IRWU_RG_RO + tfile.addfile(tarinfo=tar_info, fileobj=BytesIO(b_data)) + artifacts_manager.save_collection_source( candidate, url, - '', + secure_hash(to_text(b_existing_path), hash_func=sha256), token.NoTokenSentinel, 'https://galaxy.example.com/api/v3/collections/ansible_namespace/collection/versions/1.2.3', [], ) - b_existing_path = os.path.join(b_output_dir, to_bytes('ansible_namespace-collection-1.2.3.tar.gz')) - with open(b_existing_path, 'wb') as existing_file: - existing_file.write(b'\x00\x01') - mock_display = MagicMock() monkeypatch.setattr(Display, 'display', mock_display) @@ -963,7 +969,11 @@ def test_download_collections_skips_existing_artifact(tmp_path_factory, monkeypa mock_get_artifact = MagicMock() monkeypatch.setattr(artifacts_manager, 'get_artifact_path_from_unknown', mock_get_artifact) - collection.download_collections([], to_text(output_dir), [], False, False, artifacts_manager) + collection.download_collections( + [], to_text(output_dir), [], False, False, + artifacts_manager=artifacts_manager, + force=False, + ) skip_messages = [ call[1][0] for call in mock_display.mock_calls @@ -973,6 +983,110 @@ def test_download_collections_skips_existing_artifact(tmp_path_factory, monkeypa assert skip_messages[0].endswith(to_text(b_existing_path)) assert mock_get_artifact.call_count == 0 + +def test_download_collections_redownloads_on_checksum_mismatch(tmp_path_factory, monkeypatch, manifest_template): + output_dir = tmp_path_factory.mktemp('download-output') + temp_dir = tmp_path_factory.mktemp('download-temp') + b_output_dir = to_bytes(output_dir) + + artifacts_manager = collection.concrete_artifact_manager.ConcreteArtifactsManager( + to_bytes(temp_dir), + validate_certs=False, + ) + + manifest_info = manifest_template(namespace='ansible_namespace', name='collection', version='1.2.3') + url = 'https://galaxy.example.com/downloads/ansible_namespace-collection-1.2.3.tar.gz' + candidate = Candidate('ansible_namespace.collection', '1.2.3', 'https://galaxy.example.com', 'galaxy', None) + + b_existing_path = os.path.join(b_output_dir, to_bytes('ansible_namespace-collection-1.2.3.tar.gz')) + with tarfile.open(b_existing_path, 'w:gz') as tfile: + b_data = to_bytes(json.dumps(manifest_info)) + tar_info = tarfile.TarInfo('MANIFEST.json') + tar_info.size = len(b_data) + tar_info.mode = S_IRWU_RG_RO + tfile.addfile(tarinfo=tar_info, fileobj=BytesIO(b_data)) + + artifacts_manager.save_collection_source( + candidate, + url, + 'bad', + token.NoTokenSentinel, + 'https://galaxy.example.com/api/v3/collections/ansible_namespace/collection/versions/1.2.3', + [], + ) + + mock_display = MagicMock() + monkeypatch.setattr(Display, 'display', mock_display) + + mock_resolve = MagicMock() + mock_resolve.return_value = {candidate.fqcn: candidate} + monkeypatch.setattr(collection, '_resolve_depenency_map', mock_resolve) + + mock_get_artifact = MagicMock() + mock_get_artifact.return_value = b_existing_path + monkeypatch.setattr(artifacts_manager, 'get_artifact_path_from_unknown', mock_get_artifact) + monkeypatch.setattr(collection.shutil, 'copy', MagicMock()) + + collection.download_collections( + [], to_text(output_dir), [], False, False, + artifacts_manager=artifacts_manager, + force=False, + ) + + assert mock_get_artifact.call_count == 1 + + +def test_download_collections_redownloads_on_manifest_mismatch(tmp_path_factory, monkeypatch, manifest_template): + output_dir = tmp_path_factory.mktemp('download-output') + temp_dir = tmp_path_factory.mktemp('download-temp') + b_output_dir = to_bytes(output_dir) + + artifacts_manager = collection.concrete_artifact_manager.ConcreteArtifactsManager( + to_bytes(temp_dir), + validate_certs=False, + ) + + manifest_info = manifest_template(namespace='ansible_namespace', name='collection', version='9.9.9') + url = 'https://galaxy.example.com/downloads/ansible_namespace-collection-1.2.3.tar.gz' + candidate = Candidate('ansible_namespace.collection', '1.2.3', 'https://galaxy.example.com', 'galaxy', None) + + b_existing_path = os.path.join(b_output_dir, to_bytes('ansible_namespace-collection-1.2.3.tar.gz')) + with tarfile.open(b_existing_path, 'w:gz') as tfile: + b_data = to_bytes(json.dumps(manifest_info)) + tar_info = tarfile.TarInfo('MANIFEST.json') + tar_info.size = len(b_data) + tar_info.mode = S_IRWU_RG_RO + tfile.addfile(tarinfo=tar_info, fileobj=BytesIO(b_data)) + + artifacts_manager.save_collection_source( + candidate, + url, + secure_hash(to_text(b_existing_path), hash_func=sha256), + token.NoTokenSentinel, + 'https://galaxy.example.com/api/v3/collections/ansible_namespace/collection/versions/1.2.3', + [], + ) + + mock_display = MagicMock() + monkeypatch.setattr(Display, 'display', mock_display) + + mock_resolve = MagicMock() + mock_resolve.return_value = {candidate.fqcn: candidate} + monkeypatch.setattr(collection, '_resolve_depenency_map', mock_resolve) + + mock_get_artifact = MagicMock() + mock_get_artifact.return_value = b_existing_path + monkeypatch.setattr(artifacts_manager, 'get_artifact_path_from_unknown', mock_get_artifact) + monkeypatch.setattr(collection.shutil, 'copy', MagicMock()) + + collection.download_collections( + [], to_text(output_dir), [], False, False, + artifacts_manager=artifacts_manager, + force=False, + ) + + assert mock_get_artifact.call_count == 1 + def test_extract_tar_file_invalid_hash(tmp_tarfile): temp_dir, tfile, filename, dummy = tmp_tarfile From 2ece43b1bdc6c8bd210171762a19c95db84d1a5c Mon Sep 17 00:00:00 2001 From: shreyash bhosale Date: Wed, 28 Jan 2026 14:59:13 +0530 Subject: [PATCH 3/5] Fix pep8 spacing in galaxy tests --- test/units/galaxy/test_collection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/units/galaxy/test_collection.py b/test/units/galaxy/test_collection.py index ce1bad2918c..638aa0d430d 100644 --- a/test/units/galaxy/test_collection.py +++ b/test/units/galaxy/test_collection.py @@ -1087,6 +1087,7 @@ def test_download_collections_redownloads_on_manifest_mismatch(tmp_path_factory, assert mock_get_artifact.call_count == 1 + def test_extract_tar_file_invalid_hash(tmp_tarfile): temp_dir, tfile, filename, dummy = tmp_tarfile From 97ddd479dba6080074020e75e7b20fdc39ccfb40 Mon Sep 17 00:00:00 2001 From: shreyash bhosale Date: Wed, 28 Jan 2026 15:27:58 +0530 Subject: [PATCH 4/5] Fix download path initialization for SCM downloads --- lib/ansible/galaxy/collection/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/ansible/galaxy/collection/__init__.py b/lib/ansible/galaxy/collection/__init__.py index 780a71e120b..b62b298e8c2 100644 --- a/lib/ansible/galaxy/collection/__init__.py +++ b/lib/ansible/galaxy/collection/__init__.py @@ -556,6 +556,7 @@ def download_collections( ) continue + b_dest_path = None b_expected_basename = artifacts_manager.get_expected_artifact_basename(concrete_coll_pin) if b_expected_basename is not None: b_dest_path = os.path.join(b_output_path, b_expected_basename) From 7d005d7e94b2d3614ccda3d8801dc1c6f0e546e9 Mon Sep 17 00:00:00 2001 From: shreyash bhosale Date: Wed, 28 Jan 2026 16:54:03 +0530 Subject: [PATCH 5/5] Adjust download integration assertions for skip --- .../ansible-galaxy-collection/tasks/download.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/test/integration/targets/ansible-galaxy-collection/tasks/download.yml b/test/integration/targets/ansible-galaxy-collection/tasks/download.yml index 8a2fa5652e4..d2cfbe6b7d2 100644 --- a/test/integration/targets/ansible-galaxy-collection/tasks/download.yml +++ b/test/integration/targets/ansible-galaxy-collection/tasks/download.yml @@ -48,9 +48,15 @@ - name: assert download collection with multiple dependencies assert: that: - - '"Downloading collection ''parent_dep.parent_collection:1.0.0'' to" in download_collection.stdout' - - '"Downloading collection ''child_dep.child_collection:0.9.9'' to" in download_collection.stdout' - - '"Downloading collection ''child_dep.child_dep2:1.2.2'' to" in download_collection.stdout' + - >- + "Downloading collection 'parent_dep.parent_collection:1.0.0' to" in download_collection.stdout + or 'parent_dep-parent_collection-1.0.0.tar.gz' in download_collection.stdout + - >- + "Downloading collection 'child_dep.child_collection:0.9.9' to" in download_collection.stdout + or 'child_dep-child_collection-0.9.9.tar.gz' in download_collection.stdout + - >- + "Downloading collection 'child_dep.child_dep2:1.2.2' to" in download_collection.stdout + or 'child_dep-child_dep2-1.2.2.tar.gz' in download_collection.stdout - download_collection_actual.examined == 4 - download_collection_actual.matched == 4 - (download_collection_actual.files[0].path | basename) in ['requirements.yml', 'child_dep-child_dep2-1.2.2.tar.gz', 'child_dep-child_collection-0.9.9.tar.gz', 'parent_dep-parent_collection-1.0.0.tar.gz']