docker_container: allow to wait for a container to become healthy (#921)

* Allow to wait for a container to become healthy.

* Improve wording.

Co-authored-by: Don Naro <dnaro@redhat.com>

* Improve explanation.

---------

Co-authored-by: Don Naro <dnaro@redhat.com>
This commit is contained in:
Felix Fontein 2024-07-09 20:07:00 +02:00 committed by GitHub
parent ec37166a6c
commit 4b7e74b75e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 117 additions and 12 deletions

View File

@ -0,0 +1,4 @@
minor_changes:
- "docker_container - the new ``state=healthy`` allows to wait for a container to become healthy on startup.
The ``healthy_wait_timeout`` option allows to configure the maximum time to wait for this to happen
(https://github.com/ansible-collections/community.docker/issues/890, https://github.com/ansible-collections/community.docker/pull/921)."

View File

@ -85,6 +85,9 @@ class ContainerManager(DockerBaseClass):
self.param_pull_check_mode_behavior = self.module.params['pull_check_mode_behavior']
self.param_recreate = self.module.params['recreate']
self.param_removal_wait_timeout = self.module.params['removal_wait_timeout']
self.param_healthy_wait_timeout = self.module.params['healthy_wait_timeout']
if self.param_healthy_wait_timeout <= 0:
self.param_healthy_wait_timeout = None
self.param_restart = self.module.params['restart']
self.param_state = self.module.params['state']
self._parse_comparisons()
@ -212,7 +215,7 @@ class ContainerManager(DockerBaseClass):
self.client.fail(*args, **kwargs)
def run(self):
if self.param_state in ('stopped', 'started', 'present'):
if self.param_state in ('stopped', 'started', 'present', 'healthy'):
self.present(self.param_state)
elif self.param_state == 'absent':
self.absent()
@ -227,7 +230,7 @@ class ContainerManager(DockerBaseClass):
if self.facts:
self.results['container'] = self.facts
def wait_for_state(self, container_id, complete_states=None, wait_states=None, accept_removal=False, max_wait=None):
def wait_for_state(self, container_id, complete_states=None, wait_states=None, accept_removal=False, max_wait=None, health_state=False):
delay = 1.0
total_wait = 0
while True:
@ -235,21 +238,24 @@ class ContainerManager(DockerBaseClass):
result = self.engine_driver.inspect_container_by_id(self.client, container_id)
if result is None:
if accept_removal:
return
return result
msg = 'Encontered vanished container while waiting for container "{0}"'
self.fail(msg.format(container_id))
# Check container state
state = result.get('State', {}).get('Status')
state_info = result.get('State') or {}
if health_state:
state_info = state_info.get('Health') or {}
state = state_info.get('Status')
if complete_states is not None and state in complete_states:
return
return result
if wait_states is not None and state not in wait_states:
msg = 'Encontered unexpected state "{1}" while waiting for container "{0}"'
self.fail(msg.format(container_id, state))
self.fail(msg.format(container_id, state), container=result)
# Wait
if max_wait is not None:
if total_wait > max_wait or delay < 1E-4:
msg = 'Timeout of {1} seconds exceeded while waiting for container "{0}"'
self.fail(msg.format(container_id, max_wait))
self.fail(msg.format(container_id, max_wait), container=result)
if total_wait + delay > max_wait:
delay = max_wait - total_wait
sleep(delay)
@ -368,10 +374,10 @@ class ContainerManager(DockerBaseClass):
container = self.update_limits(container, container_image, comparison_image, host_info)
container = self.update_networks(container, container_created)
if state == 'started' and not container.running:
if state in ('started', 'healthy') and not container.running:
self.diff_tracker.add('running', parameter=True, active=was_running)
container = self.container_start(container.id)
elif state == 'started' and self.param_restart:
elif state in ('started', 'healthy') and self.param_restart:
self.diff_tracker.add('running', parameter=True, active=was_running)
self.diff_tracker.add('restarted', parameter=True, active=False)
container = self.container_restart(container.id)
@ -380,7 +386,7 @@ class ContainerManager(DockerBaseClass):
self.container_stop(container.id)
container = self._get_container(container.id)
if state == 'started' and self.param_paused is not None and container.paused != self.param_paused:
if state in ('started', 'healthy') and self.param_paused is not None and container.paused != self.param_paused:
self.diff_tracker.add('paused', parameter=self.param_paused, active=was_paused)
if not self.check_mode:
try:
@ -398,6 +404,19 @@ class ContainerManager(DockerBaseClass):
self.facts = container.raw
if state == 'healthy' and not self.check_mode:
# `None` means that no health check enabled; simply treat this as 'healthy'
inspect_result = self.wait_for_state(
container.id,
wait_states=['starting', 'unhealthy'],
complete_states=['healthy', None],
max_wait=self.param_healthy_wait_timeout,
health_state=True,
)
if inspect_result:
# Return the latest inspection results retrieved
self.facts = inspect_result
def absent(self):
container = self._get_container(self.param_name)
if container.exists:
@ -878,10 +897,11 @@ def run_module(engine_driver):
recreate=dict(type='bool', default=False),
removal_wait_timeout=dict(type='float'),
restart=dict(type='bool', default=False),
state=dict(type='str', default='started', choices=['absent', 'present', 'started', 'stopped']),
state=dict(type='str', default='started', choices=['absent', 'present', 'healthy', 'started', 'stopped']),
healthy_wait_timeout=dict(type='float', default=300),
),
required_if=[
('state', 'present', ['image'])
('state', 'present', ['image']),
],
)

View File

@ -381,6 +381,7 @@ options:
- "O(healthcheck.interval), O(healthcheck.timeout), O(healthcheck.start_period), and O(healthcheck.start_interval) are specified as durations.
They accept duration as a string in a format that look like: V(5h34m56s), V(1m30s), and so on.
The supported units are V(us), V(ms), V(s), V(m) and V(h)."
- See also O(state=healthy).
type: dict
suboptions:
test:
@ -919,6 +920,11 @@ options:
with the requested config.'
- 'V(started) - Asserts that the container is first V(present), and then if the container is not running moves it to a running
state. Use O(restart) to force a matching container to be stopped and restarted.'
- V(healthy) - Asserts that the container is V(present) and V(started), and is actually healthy as well.
This means that the conditions defined in O(healthcheck) respectively in the image's C(HEALTHCHECK)
(L(Docker reference for HEALTHCHECK, https://docs.docker.com/reference/dockerfile/#healthcheck))
are satisfied.
The time waited can be controlled with O(healthy_wait_timeout). This state has been added in community.docker 3.11.0.
- 'V(stopped) - Asserts that the container is first V(present), and then if the container is running moves it to a stopped
state.'
- "To control what will be taken into account when comparing configuration, see the O(comparisons) option. To avoid that the
@ -932,12 +938,23 @@ options:
choices:
- absent
- present
- healthy
- stopped
- started
stop_signal:
description:
- Override default signal used to stop the container.
type: str
healthy_wait_timeout:
description:
- When waiting for the container to become healthy if O(state=healthy), this option controls how long
the module waits until the container state becomes healthy.
- The timeout is specified in seconds. The default, V(300), is 5 minutes.
- Set this to 0 or a negative value to wait indefinitely.
Note that depending on the container this can result in the module not terminating.
default: 300
type: float
version_added: 3.11.0
stop_timeout:
description:
- Number of seconds to wait for the container to stop before sending C(SIGKILL).

View File

@ -0,0 +1,64 @@
---
# Copyright (c) Ansible Project
# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later
- name: Registering container name
set_fact:
cname: "{{ cname_prefix ~ '-hi' }}"
- name: Registering container name
set_fact:
cnames: "{{ cnames + [cname] }}"
- name: Prepare container
docker_container:
name: "{{ cname }}"
image: "{{ docker_test_image_healthcheck }}"
command: '10m'
state: stopped
register: healthy_1
- debug: var=healthy_1.container.State
- name: Start container (not healthy in time)
docker_container:
name: "{{ cname }}"
state: healthy
healthy_wait_timeout: 1
register: healthy_2
ignore_errors: true
- debug: var=healthy_2.container.State
- name: Prepare container
docker_container:
name: "{{ cname }}"
image: "{{ docker_test_image_healthcheck }}"
command: '10m 5s'
state: stopped
force_kill: true
register: healthy_3
- debug: var=healthy_3.container.State
- name: Start container (healthy in time)
docker_container:
name: "{{ cname }}"
state: healthy
healthy_wait_timeout: 10
register: healthy_4
- debug: var=healthy_4.container.State
- name: Cleanup
docker_container:
name: "{{ cname }}"
state: absent
force_kill: true
- assert:
that:
- healthy_2 is failed
- healthy_2.container.State.Health.Status == "starting"
- healthy_2.msg.startswith("Timeout of 1.0 seconds exceeded while waiting for container ")
- healthy_4 is changed
- healthy_4.container.State.Health.Status == "healthy"