Make image archive/save idempotent, using image id and repo tags as keys (#500)

This commit is contained in:
iamjpotts 2022-11-30 16:45:36 -06:00 committed by GitHub
parent c2d84efccb
commit 166d485216
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 562 additions and 6 deletions

View File

@ -0,0 +1,4 @@
minor_changes:
- docker_image - when using ``archive_path``, detect whether changes are necessary based on the image ID (hash). If the existing tar archive matches the source, do nothing.
Previously, each task execution re-created the archive
(https://github.com/ansible-collections/community.docker/pull/500).

View File

@ -0,0 +1,157 @@
# Copyright 2022 Red Hat | Ansible
# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
import json
import os
import tarfile
from ansible.module_utils.common.text.converters import to_native
class ImageArchiveManifestSummary(object):
'''
Represents data extracted from a manifest.json found in the tar archive output of the
"docker image save some:tag > some.tar" command.
'''
def __init__(self, image_id, repo_tags):
'''
:param image_id: File name portion of Config entry, e.g. abcde12345 from abcde12345.json
:type image_id: str
:param repo_tags Docker image names, e.g. ["hello-world:latest"]
:type repo_tags: list
'''
self.image_id = image_id
self.repo_tags = repo_tags
class ImageArchiveInvalidException(Exception):
def __init__(self, message, cause):
'''
:param message: Exception message
:type message: str
:param cause: Inner exception that this exception wraps
:type cause: Exception | None
'''
super(ImageArchiveInvalidException, self).__init__(message)
# Python 2 doesn't support causes
self.cause = cause
def api_image_id(archive_image_id):
'''
Accepts an image hash in the format stored in manifest.json, and returns an equivalent identifier
that represents the same image hash, but in the format presented by the Docker Engine API.
:param archive_image_id: plain image hash
:type archive_image_id: str
:returns: Prefixed hash used by REST api
:rtype: str
'''
return 'sha256:%s' % archive_image_id
def archived_image_manifest(archive_path):
'''
Attempts to get Image.Id and image name from metadata stored in the image
archive tar file.
The tar should contain a file "manifest.json" with an array with a single entry,
and the entry should have a Config field with the image ID in its file name, as
well as a RepoTags list, which typically has only one entry.
:raises:
ImageArchiveInvalidException: A file already exists at archive_path, but could not extract an image ID from it.
:param archive_path: Tar file to read
:type archive_path: str
:return: None, if no file at archive_path, or the extracted image ID, which will not have a sha256: prefix.
:rtype: ImageArchiveManifestSummary
'''
try:
# FileNotFoundError does not exist in Python 2
if not os.path.isfile(archive_path):
return None
tf = tarfile.open(archive_path, 'r')
try:
try:
ef = tf.extractfile('manifest.json')
try:
text = ef.read().decode('utf-8')
manifest = json.loads(text)
except Exception as exc:
raise ImageArchiveInvalidException(
"Failed to decode and deserialize manifest.json: %s" % to_native(exc),
exc
)
finally:
# In Python 2.6, this does not have __exit__
ef.close()
if len(manifest) != 1:
raise ImageArchiveInvalidException(
"Expected to have one entry in manifest.json but found %s" % len(manifest),
None
)
m0 = manifest[0]
try:
config_file = m0['Config']
except KeyError as exc:
raise ImageArchiveInvalidException(
"Failed to get Config entry from manifest.json: %s" % to_native(exc),
exc
)
# Extracts hash without 'sha256:' prefix
try:
# Strip off .json filename extension, leaving just the hash.
image_id = os.path.splitext(config_file)[0]
except Exception as exc:
raise ImageArchiveInvalidException(
"Failed to extract image id from config file name %s: %s" % (config_file, to_native(exc)),
exc
)
try:
repo_tags = m0['RepoTags']
except KeyError as exc:
raise ImageArchiveInvalidException(
"Failed to get RepoTags entry from manifest.json: %s" % to_native(exc),
exc
)
return ImageArchiveManifestSummary(
image_id=image_id,
repo_tags=repo_tags
)
except ImageArchiveInvalidException:
raise
except Exception as exc:
raise ImageArchiveInvalidException(
"Failed to extract manifest.json from tar file %s: %s" % (archive_path, to_native(exc)),
exc
)
finally:
# In Python 2.6, TarFile does not have __exit__
tf.close()
except ImageArchiveInvalidException:
raise
except Exception as exc:
raise ImageArchiveInvalidException("Failed to open tar file %s: %s" % (archive_path, to_native(exc)), exc)

View File

@ -343,6 +343,13 @@ from ansible_collections.community.docker.plugins.module_utils.common_api import
AnsibleDockerClient,
RequestException,
)
from ansible_collections.community.docker.plugins.module_utils.image_archive import (
archived_image_manifest,
api_image_id,
ImageArchiveInvalidException,
)
from ansible_collections.community.docker.plugins.module_utils.util import (
clean_dict_booleans_for_docker_api,
DockerBaseClass,
@ -535,6 +542,42 @@ class ImageManager(DockerBaseClass):
self.results['actions'].append("Removed image %s" % (name))
self.results['image']['state'] = 'Deleted'
@staticmethod
def archived_image_action(failure_logger, archive_path, current_image_name, current_image_id):
'''
If the archive is missing or requires replacement, return an action message.
:param failure_logger: a logging function that accepts one parameter of type str
:type failure_logger: Callable
:param archive_path: Filename to write archive to
:type archive_path: str
:param current_image_name: repo:tag
:type current_image_name: str
:param current_image_id: Hash, including hash type prefix such as "sha256:"
:type current_image_id: str
:returns: Either None, or an Ansible action message.
:rtype: str
'''
def build_msg(reason):
return 'Archived image %s to %s, %s' % (current_image_name, archive_path, reason)
try:
archived = archived_image_manifest(archive_path)
except ImageArchiveInvalidException as exc:
failure_logger('Unable to extract manifest summary from archive: %s' % to_native(exc))
return build_msg('overwriting an unreadable archive file')
if archived is None:
return build_msg('since none present')
elif current_image_id == api_image_id(archived.image_id) and [current_image_name] == archived.repo_tags:
return None
else:
name = ', '.join(archived.repo_tags)
return build_msg('overwriting archive with image %s named %s' % (archived.image_id, name))
def archive_image(self, name, tag):
'''
Archive an image to a .tar file. Called when archive_path is passed.
@ -559,9 +602,17 @@ class ImageManager(DockerBaseClass):
self.log("archive image: image %s not found" % image_name)
return
self.results['actions'].append('Archived image %s to %s' % (image_name, self.archive_path))
self.results['changed'] = True
if not self.check_mode:
# Will have a 'sha256:' prefix
image_id = image['Id']
action = self.archived_image_action(self.client.module.debug, self.archive_path, image_name, image_id)
if action:
self.results['actions'].append(action)
self.results['changed'] = action is not None
if (not self.check_mode) and self.results['changed']:
self.log("Getting archive of image %s" % image_name)
try:
saved_image = self.client._stream_raw_result(
@ -579,8 +630,7 @@ class ImageManager(DockerBaseClass):
except Exception as exc:
self.fail("Error writing image archive %s - %s" % (self.archive_path, to_native(exc)))
if image:
self.results['image'] = image
self.results['image'] = image
def push_image(self, name, tag=None):
'''

View File

@ -7,10 +7,11 @@
set_fact:
iname: "{{ name_prefix ~ '-options' }}"
iname_1: "{{ name_prefix ~ '-options-1' }}"
hello_world_alt: "{{ name_prefix }}-hello-world-alt:v1.2.3-foo"
- name: Registering image name
set_fact:
inames: "{{ inames + [iname, iname_1] }}"
inames: "{{ inames + [iname, iname_1, hello_world_alt] }}"
####################################################################
## build.args ######################################################
@ -235,6 +236,63 @@
source: pull
register: archive_image
- assert:
that:
- archive_image is changed
- name: Copy archive because we will mutate it but other tests need the original
copy:
remote_src: true
src: "{{ remote_tmp_dir }}/image.tar"
dest: "{{ remote_tmp_dir }}/image_mutated.tar"
- name: Archive image again (idempotent)
docker_image:
name: "{{ docker_test_image_hello_world }}"
archive_path: "{{ remote_tmp_dir }}/image_mutated.tar"
source: local
register: archive_image_2
- assert:
that:
- archive_image_2 is not changed
- name: Archive image 3rd time, should overwrite due to different id
docker_image:
name: "{{ docker_test_image_alpine_different }}"
archive_path: "{{ remote_tmp_dir }}/image_mutated.tar"
source: pull
register: archive_image_3
- assert:
that:
- archive_image_3 is changed
- name: Reset archive
copy:
remote_src: true
src: "{{ remote_tmp_dir }}/image.tar"
dest: "{{ remote_tmp_dir }}/image_mutated.tar"
- name: Tag image with different name
docker_image:
name: "{{ docker_test_image_hello_world }}"
repository: "{{ hello_world_alt }}"
source: local
- name: Archive image 4th time, should overwrite due to different name even when ID is same
docker_image:
name: "{{ hello_world_alt }}"
# Tagged as docker_test_image_hello_world but has same hash/id (before this task overwrites it)
archive_path: "{{ remote_tmp_dir }}/image_mutated.tar"
source: local
register: archive_image_4
- assert:
that:
- archive_image_4 is changed
# This is the test that needs the original, non-mutated archive
- name: Archive image by ID
docker_image:
name: "{{ archive_image.image.Id }}"

View File

@ -0,0 +1,94 @@
# Copyright 2022 Red Hat | Ansible
# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
import pytest
import tarfile
from ansible_collections.community.docker.plugins.module_utils.image_archive import (
api_image_id,
archived_image_manifest,
ImageArchiveInvalidException
)
from ..test_support.docker_image_archive_stubbing import (
write_imitation_archive,
write_imitation_archive_with_manifest,
write_irrelevant_tar,
)
@pytest.fixture
def tar_file_name(tmpdir):
'''
Return the name of a non-existing tar file in an existing temporary directory.
'''
# Cast to str required by Python 2.x
return str(tmpdir.join('foo.tar'))
@pytest.mark.parametrize('expected, value', [
('sha256:foo', 'foo'),
('sha256:bar', 'bar')
])
def test_api_image_id_from_archive_id(expected, value):
assert api_image_id(value) == expected
def test_archived_image_manifest_extracts(tar_file_name):
expected_id = "abcde12345"
expected_tags = ["foo:latest", "bar:v1"]
write_imitation_archive(tar_file_name, expected_id, expected_tags)
actual = archived_image_manifest(tar_file_name)
assert actual.image_id == expected_id
assert actual.repo_tags == expected_tags
def test_archived_image_manifest_extracts_nothing_when_file_not_present(tar_file_name):
image_id = archived_image_manifest(tar_file_name)
assert image_id is None
def test_archived_image_manifest_raises_when_file_not_a_tar():
try:
archived_image_manifest(__file__)
raise AssertionError()
except ImageArchiveInvalidException as e:
assert isinstance(e.cause, tarfile.ReadError)
assert str(__file__) in str(e)
def test_archived_image_manifest_raises_when_tar_missing_manifest(tar_file_name):
write_irrelevant_tar(tar_file_name)
try:
archived_image_manifest(tar_file_name)
raise AssertionError()
except ImageArchiveInvalidException as e:
assert isinstance(e.cause, KeyError)
assert 'manifest.json' in str(e.cause)
def test_archived_image_manifest_raises_when_manifest_missing_id(tar_file_name):
manifest = [
{
'foo': 'bar'
}
]
write_imitation_archive_with_manifest(tar_file_name, manifest)
try:
archived_image_manifest(tar_file_name)
raise AssertionError()
except ImageArchiveInvalidException as e:
assert isinstance(e.cause, KeyError)
assert 'Config' in str(e.cause)

View File

@ -0,0 +1,117 @@
# Copyright 2022 Red Hat | Ansible
# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
import pytest
from ansible_collections.community.docker.plugins.modules.docker_image import ImageManager
from ansible_collections.community.docker.plugins.module_utils.image_archive import (
api_image_id,
ImageArchiveInvalidException
)
from ..test_support.docker_image_archive_stubbing import (
write_imitation_archive,
write_irrelevant_tar,
)
def assert_no_logging(msg):
raise AssertionError('Should not have logged anything but logged %s' % msg)
def capture_logging(messages):
def capture(msg):
messages.append(msg)
return capture
@pytest.fixture
def tar_file_name(tmpdir):
"""
Return the name of a non-existing tar file in an existing temporary directory.
"""
# Cast to str required by Python 2.x
return str(tmpdir.join('foo.tar'))
def test_archived_image_action_when_missing(tar_file_name):
fake_name = 'a:latest'
fake_id = 'a1'
expected = 'Archived image %s to %s, since none present' % (fake_name, tar_file_name)
actual = ImageManager.archived_image_action(assert_no_logging, tar_file_name, fake_name, api_image_id(fake_id))
assert actual == expected
def test_archived_image_action_when_current(tar_file_name):
fake_name = 'b:latest'
fake_id = 'b2'
write_imitation_archive(tar_file_name, fake_id, [fake_name])
actual = ImageManager.archived_image_action(assert_no_logging, tar_file_name, fake_name, api_image_id(fake_id))
assert actual is None
def test_archived_image_action_when_invalid(tar_file_name):
fake_name = 'c:1.2.3'
fake_id = 'c3'
write_irrelevant_tar(tar_file_name)
expected = 'Archived image %s to %s, overwriting an unreadable archive file' % (fake_name, tar_file_name)
actual_log = []
actual = ImageManager.archived_image_action(
capture_logging(actual_log),
tar_file_name,
fake_name,
api_image_id(fake_id)
)
assert actual == expected
assert len(actual_log) == 1
assert actual_log[0].startswith('Unable to extract manifest summary from archive')
def test_archived_image_action_when_obsolete_by_id(tar_file_name):
fake_name = 'd:0.0.1'
old_id = 'e5'
new_id = 'd4'
write_imitation_archive(tar_file_name, old_id, [fake_name])
expected = 'Archived image %s to %s, overwriting archive with image %s named %s' % (
fake_name, tar_file_name, old_id, fake_name
)
actual = ImageManager.archived_image_action(assert_no_logging, tar_file_name, fake_name, api_image_id(new_id))
assert actual == expected
def test_archived_image_action_when_obsolete_by_name(tar_file_name):
old_name = 'hi'
new_name = 'd:0.0.1'
fake_id = 'd4'
write_imitation_archive(tar_file_name, fake_id, [old_name])
expected = 'Archived image %s to %s, overwriting archive with image %s named %s' % (
new_name, tar_file_name, fake_id, old_name
)
actual = ImageManager.archived_image_action(assert_no_logging, tar_file_name, new_name, api_image_id(fake_id))
print('actual : %s', actual)
print('expected : %s', expected)
assert actual == expected

View File

@ -0,0 +1,76 @@
# Copyright 2022 Red Hat | Ansible
# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
import json
import tarfile
from tempfile import TemporaryFile
def write_imitation_archive(file_name, image_id, repo_tags):
'''
Write a tar file meeting these requirements:
* Has a file manifest.json
* manifest.json contains a one-element array
* The element has a Config property with "[image_id].json" as the value name
:param file_name: Name of file to create
:type file_name: str
:param image_id: Fake sha256 hash (without the sha256: prefix)
:type image_id: str
:param repo_tags: list of fake image:tag's
:type repo_tags: list
'''
manifest = [
{
'Config': '%s.json' % image_id,
'RepoTags': repo_tags
}
]
write_imitation_archive_with_manifest(file_name, manifest)
def write_imitation_archive_with_manifest(file_name, manifest):
tf = tarfile.open(file_name, 'w')
try:
with TemporaryFile() as f:
f.write(json.dumps(manifest).encode('utf-8'))
ti = tarfile.TarInfo('manifest.json')
ti.size = f.tell()
f.seek(0)
tf.addfile(ti, f)
finally:
# In Python 2.6, this does not have __exit__
tf.close()
def write_irrelevant_tar(file_name):
'''
Create a tar file that does not match the spec for "docker image save" / "docker image load" commands.
:param file_name: Name of tar file to create
:type file_name: str
'''
tf = tarfile.open(file_name, 'w')
try:
with TemporaryFile() as f:
f.write('Hello, world.'.encode('utf-8'))
ti = tarfile.TarInfo('hi.txt')
ti.size = f.tell()
f.seek(0)
tf.addfile(ti, f)
finally:
tf.close()