This commit is contained in:
Shreyash Bhosale 2026-02-03 19:16:43 -06:00 committed by GitHub
commit fa3aa60e81
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 261 additions and 10 deletions

View file

@ -0,0 +1,2 @@
minor_changes:
- ansible-galaxy - Skip re-downloading collection artifacts when a matching file already exists unless ``--force`` is used.

View file

@ -288,7 +288,7 @@ class GalaxyCLI(CLI):
collection.set_defaults(func=self.execute_collection) # to satisfy doc build
collection_parser = collection.add_subparsers(metavar='COLLECTION_ACTION', dest='action')
collection_parser.required = True
self.add_download_options(collection_parser, parents=[common, cache_options])
self.add_download_options(collection_parser, parents=[common, cache_options, force])
self.add_init_options(collection_parser, parents=[common, force])
self.add_build_options(collection_parser, parents=[common, force])
self.add_publish_options(collection_parser, parents=[common])
@ -1033,6 +1033,7 @@ class GalaxyCLI(CLI):
collections = context.CLIARGS['args']
no_deps = context.CLIARGS['no_deps']
download_path = context.CLIARGS['download_path']
force = context.CLIARGS['force']
requirements_file = context.CLIARGS['requirements']
if requirements_file:
@ -1051,6 +1052,7 @@ class GalaxyCLI(CLI):
download_collections(
requirements, download_path, self.api_servers, no_deps,
context.CLIARGS['allow_pre_release'],
force=force,
artifacts_manager=artifacts_manager,
)

View file

@ -514,6 +514,7 @@ def download_collections(
no_deps, # type: bool
allow_pre_release, # type: bool
artifacts_manager, # type: ConcreteArtifactsManager
force=False, # type: bool
): # type: (...) -> None
"""Download Ansible collections as their tarball from a Galaxy server to the path specified and creates a requirements
file of the downloaded requirements to be used for an install.
@ -524,6 +525,7 @@ def download_collections(
:param validate_certs: Whether to validate the certificate if downloading a tarball from a non-Galaxy host.
:param no_deps: Ignore any collection dependencies and only download the base requirements.
:param allow_pre_release: Do not ignore pre-release versions when selecting the latest.
:param force: Force re-download of existing artifacts.
"""
with _display_progress("Process download dependency map"):
dep_map = _resolve_depenency_map(
@ -554,6 +556,24 @@ def download_collections(
)
continue
b_dest_path = None
b_expected_basename = artifacts_manager.get_expected_artifact_basename(concrete_coll_pin)
if b_expected_basename is not None:
b_dest_path = os.path.join(b_output_path, b_expected_basename)
if not force and artifacts_manager._artifact_exists(b_dest_path):
if artifacts_manager.validate_existing_artifact(concrete_coll_pin, b_dest_path):
display.display(
"Skipping download; file already exists: %s" % to_text(b_dest_path)
)
requirements.append({
'name': to_native(os.path.basename(b_dest_path)),
'version': concrete_coll_pin.ver,
})
continue
display.display(
"Existing artifact did not match requested collection; re-downloading: %s" % to_text(b_dest_path)
)
display.display(
u"Downloading collection '{coll!s}' to '{path!s}'".
format(coll=to_text(concrete_coll_pin), path=to_text(b_output_path)),
@ -561,10 +581,11 @@ def download_collections(
b_src_path = artifacts_manager.get_artifact_path_from_unknown(concrete_coll_pin)
b_dest_path = os.path.join(
b_output_path,
os.path.basename(b_src_path),
)
if b_dest_path is None:
b_dest_path = os.path.join(
b_output_path,
os.path.basename(b_src_path),
)
if concrete_coll_pin.is_dir:
b_dest_path = to_bytes(

View file

@ -15,7 +15,7 @@ import yaml
from contextlib import contextmanager
from hashlib import sha256
from urllib.error import URLError
from urllib.parse import urldefrag
from urllib.parse import urldefrag, urlsplit
from shutil import rmtree
from tempfile import mkdtemp
@ -39,6 +39,7 @@ from ansible.module_utils.common.sentinel import Sentinel
from ansible.module_utils.common.yaml import yaml_load
from ansible.module_utils.urls import open_url
from ansible.utils.display import Display
from ansible.utils.hashing import secure_hash
import ansible.constants as C
@ -126,6 +127,64 @@ class ConcreteArtifactsManager:
"signatures": signatures,
}
@staticmethod
def _artifact_exists(b_artifact_path):
# type: (bytes) -> bool
return os.path.isfile(b_artifact_path)
@staticmethod
def _get_url_basename(url):
# type: (str) -> t.Optional[bytes]
url_path = urlsplit(url).path
basename = os.path.basename(url_path)
if not basename:
return None
return to_bytes(basename, errors='surrogate_or_strict')
def get_expected_artifact_basename(self, collection):
# type: (t.Union[Candidate, Requirement]) -> t.Optional[bytes]
if collection.is_dir or collection.is_scm or collection.is_subdirs:
return None
if collection.is_url:
return self._get_url_basename(collection.src)
if collection.is_file:
return os.path.basename(to_bytes(collection.src, errors='surrogate_or_strict'))
try:
url = self._galaxy_collection_cache[collection][0]
except KeyError:
return None
return self._get_url_basename(url)
def get_expected_artifact_hash(self, collection):
# type: (t.Union[Candidate, Requirement]) -> t.Optional[str]
try:
_url, sha256_hash, _token = self._galaxy_collection_cache[collection]
except KeyError:
return None
return sha256_hash or None
def validate_existing_artifact(self, collection, b_artifact_path):
# type: (t.Union[Candidate, Requirement], bytes) -> bool
try:
collection_meta = _get_meta_from_tar(b_artifact_path)
except AnsibleError:
return False
if (
collection_meta.get('namespace') != collection.namespace or
collection_meta.get('name') != collection.name or
collection_meta.get('version') != collection.ver
):
return False
expected_hash = self.get_expected_artifact_hash(collection)
if expected_hash:
actual_hash = secure_hash(to_native(b_artifact_path), hash_func=sha256)
return actual_hash == expected_hash
return True
def get_galaxy_artifact_path(self, collection):
# type: (t.Union[Candidate, Requirement]) -> bytes
"""Given a Galaxy-stored collection, return a cached path.

View file

@ -48,9 +48,15 @@
- name: assert download collection with multiple dependencies
assert:
that:
- '"Downloading collection ''parent_dep.parent_collection:1.0.0'' to" in download_collection.stdout'
- '"Downloading collection ''child_dep.child_collection:0.9.9'' to" in download_collection.stdout'
- '"Downloading collection ''child_dep.child_dep2:1.2.2'' to" in download_collection.stdout'
- >-
"Downloading collection 'parent_dep.parent_collection:1.0.0' to" in download_collection.stdout
or 'parent_dep-parent_collection-1.0.0.tar.gz' in download_collection.stdout
- >-
"Downloading collection 'child_dep.child_collection:0.9.9' to" in download_collection.stdout
or 'child_dep-child_collection-0.9.9.tar.gz' in download_collection.stdout
- >-
"Downloading collection 'child_dep.child_dep2:1.2.2' to" in download_collection.stdout
or 'child_dep-child_dep2-1.2.2.tar.gz' in download_collection.stdout
- download_collection_actual.examined == 4
- download_collection_actual.matched == 4
- (download_collection_actual.files[0].path | basename) in ['requirements.yml', 'child_dep-child_dep2-1.2.2.tar.gz', 'child_dep-child_collection-0.9.9.tar.gz', 'parent_dep-parent_collection-1.0.0.tar.gz']

View file

@ -22,13 +22,14 @@ from ansible.cli.galaxy import GalaxyCLI
from ansible.config import manager
from ansible.errors import AnsibleError
from ansible.galaxy import api, collection, token
from ansible.galaxy.dependency_resolution.dataclasses import Candidate
from ansible.module_utils.common.sentinel import Sentinel
from ansible.module_utils.common.text.converters import to_bytes, to_native, to_text
from ansible.module_utils.common.file import S_IRWU_RG_RO
import builtins
from ansible.utils import context_objects as co
from ansible.utils.display import Display
from ansible.utils.hashing import secure_hash_s
from ansible.utils.hashing import secure_hash, secure_hash_s
@pytest.fixture(autouse=True)
@ -927,6 +928,166 @@ def test_download_file_hash_mismatch(tmp_path_factory, monkeypatch):
collection._download_file('http://google.com/file', temp_dir, 'bad', True)
def test_download_collections_skips_existing_artifact(tmp_path_factory, monkeypatch, manifest_template):
output_dir = tmp_path_factory.mktemp('download-output')
temp_dir = tmp_path_factory.mktemp('download-temp')
b_output_dir = to_bytes(output_dir)
artifacts_manager = collection.concrete_artifact_manager.ConcreteArtifactsManager(
to_bytes(temp_dir),
validate_certs=False,
)
manifest_info = manifest_template(namespace='ansible_namespace', name='collection', version='1.2.3')
url = 'https://galaxy.example.com/downloads/ansible_namespace-collection-1.2.3.tar.gz'
candidate = Candidate('ansible_namespace.collection', '1.2.3', 'https://galaxy.example.com', 'galaxy', None)
b_existing_path = os.path.join(b_output_dir, to_bytes('ansible_namespace-collection-1.2.3.tar.gz'))
with tarfile.open(b_existing_path, 'w:gz') as tfile:
b_data = to_bytes(json.dumps(manifest_info))
tar_info = tarfile.TarInfo('MANIFEST.json')
tar_info.size = len(b_data)
tar_info.mode = S_IRWU_RG_RO
tfile.addfile(tarinfo=tar_info, fileobj=BytesIO(b_data))
artifacts_manager.save_collection_source(
candidate,
url,
secure_hash(to_text(b_existing_path), hash_func=sha256),
token.NoTokenSentinel,
'https://galaxy.example.com/api/v3/collections/ansible_namespace/collection/versions/1.2.3',
[],
)
mock_display = MagicMock()
monkeypatch.setattr(Display, 'display', mock_display)
mock_resolve = MagicMock()
mock_resolve.return_value = {candidate.fqcn: candidate}
monkeypatch.setattr(collection, '_resolve_depenency_map', mock_resolve)
mock_get_artifact = MagicMock()
monkeypatch.setattr(artifacts_manager, 'get_artifact_path_from_unknown', mock_get_artifact)
collection.download_collections(
[], to_text(output_dir), [], False, False,
artifacts_manager=artifacts_manager,
force=False,
)
skip_messages = [
call[1][0] for call in mock_display.mock_calls
if call[1] and isinstance(call[1][0], str) and 'Skipping download; file already exists:' in call[1][0]
]
assert len(skip_messages) == 1
assert skip_messages[0].endswith(to_text(b_existing_path))
assert mock_get_artifact.call_count == 0
def test_download_collections_redownloads_on_checksum_mismatch(tmp_path_factory, monkeypatch, manifest_template):
output_dir = tmp_path_factory.mktemp('download-output')
temp_dir = tmp_path_factory.mktemp('download-temp')
b_output_dir = to_bytes(output_dir)
artifacts_manager = collection.concrete_artifact_manager.ConcreteArtifactsManager(
to_bytes(temp_dir),
validate_certs=False,
)
manifest_info = manifest_template(namespace='ansible_namespace', name='collection', version='1.2.3')
url = 'https://galaxy.example.com/downloads/ansible_namespace-collection-1.2.3.tar.gz'
candidate = Candidate('ansible_namespace.collection', '1.2.3', 'https://galaxy.example.com', 'galaxy', None)
b_existing_path = os.path.join(b_output_dir, to_bytes('ansible_namespace-collection-1.2.3.tar.gz'))
with tarfile.open(b_existing_path, 'w:gz') as tfile:
b_data = to_bytes(json.dumps(manifest_info))
tar_info = tarfile.TarInfo('MANIFEST.json')
tar_info.size = len(b_data)
tar_info.mode = S_IRWU_RG_RO
tfile.addfile(tarinfo=tar_info, fileobj=BytesIO(b_data))
artifacts_manager.save_collection_source(
candidate,
url,
'bad',
token.NoTokenSentinel,
'https://galaxy.example.com/api/v3/collections/ansible_namespace/collection/versions/1.2.3',
[],
)
mock_display = MagicMock()
monkeypatch.setattr(Display, 'display', mock_display)
mock_resolve = MagicMock()
mock_resolve.return_value = {candidate.fqcn: candidate}
monkeypatch.setattr(collection, '_resolve_depenency_map', mock_resolve)
mock_get_artifact = MagicMock()
mock_get_artifact.return_value = b_existing_path
monkeypatch.setattr(artifacts_manager, 'get_artifact_path_from_unknown', mock_get_artifact)
monkeypatch.setattr(collection.shutil, 'copy', MagicMock())
collection.download_collections(
[], to_text(output_dir), [], False, False,
artifacts_manager=artifacts_manager,
force=False,
)
assert mock_get_artifact.call_count == 1
def test_download_collections_redownloads_on_manifest_mismatch(tmp_path_factory, monkeypatch, manifest_template):
output_dir = tmp_path_factory.mktemp('download-output')
temp_dir = tmp_path_factory.mktemp('download-temp')
b_output_dir = to_bytes(output_dir)
artifacts_manager = collection.concrete_artifact_manager.ConcreteArtifactsManager(
to_bytes(temp_dir),
validate_certs=False,
)
manifest_info = manifest_template(namespace='ansible_namespace', name='collection', version='9.9.9')
url = 'https://galaxy.example.com/downloads/ansible_namespace-collection-1.2.3.tar.gz'
candidate = Candidate('ansible_namespace.collection', '1.2.3', 'https://galaxy.example.com', 'galaxy', None)
b_existing_path = os.path.join(b_output_dir, to_bytes('ansible_namespace-collection-1.2.3.tar.gz'))
with tarfile.open(b_existing_path, 'w:gz') as tfile:
b_data = to_bytes(json.dumps(manifest_info))
tar_info = tarfile.TarInfo('MANIFEST.json')
tar_info.size = len(b_data)
tar_info.mode = S_IRWU_RG_RO
tfile.addfile(tarinfo=tar_info, fileobj=BytesIO(b_data))
artifacts_manager.save_collection_source(
candidate,
url,
secure_hash(to_text(b_existing_path), hash_func=sha256),
token.NoTokenSentinel,
'https://galaxy.example.com/api/v3/collections/ansible_namespace/collection/versions/1.2.3',
[],
)
mock_display = MagicMock()
monkeypatch.setattr(Display, 'display', mock_display)
mock_resolve = MagicMock()
mock_resolve.return_value = {candidate.fqcn: candidate}
monkeypatch.setattr(collection, '_resolve_depenency_map', mock_resolve)
mock_get_artifact = MagicMock()
mock_get_artifact.return_value = b_existing_path
monkeypatch.setattr(artifacts_manager, 'get_artifact_path_from_unknown', mock_get_artifact)
monkeypatch.setattr(collection.shutil, 'copy', MagicMock())
collection.download_collections(
[], to_text(output_dir), [], False, False,
artifacts_manager=artifacts_manager,
force=False,
)
assert mock_get_artifact.call_count == 1
def test_extract_tar_file_invalid_hash(tmp_tarfile):
temp_dir, tfile, filename, dummy = tmp_tarfile