#!/usr/bin/python # Copyright 2016 Red Hat | Ansible # GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt) # SPDX-License-Identifier: GPL-3.0-or-later from __future__ import annotations DOCUMENTATION = r""" module: docker_swarm short_description: Manage Swarm cluster description: - Create a new Swarm cluster. - Add/Remove nodes or managers to an existing cluster. extends_documentation_fragment: - community.docker._docker - community.docker._docker.docker_py_2_documentation - community.docker._attributes - community.docker._attributes.actiongroup_docker attributes: check_mode: support: full diff_mode: support: full idempotent: support: full options: advertise_addr: description: - Externally reachable address advertised to other nodes. - This can either be an address/port combination in the form V(192.168.1.1:4567), or an interface followed by a port number, like V(eth0:4567). - If the port number is omitted, the port number from the listen address is used. - If O(advertise_addr) is not specified, it will be automatically detected when possible. - Only used when swarm is initialised or joined. Because of this it is not considered for idempotency checking. type: str default_addr_pool: description: - Default address pool in CIDR format. - Only used when swarm is initialised. Because of this it is not considered for idempotency checking. - Requires API version >= 1.39. type: list elements: str subnet_size: description: - Default address pool subnet mask length. - Only used when swarm is initialised. Because of this it is not considered for idempotency checking. - Requires API version >= 1.39. type: int listen_addr: description: - Listen address used for inter-manager communication. - This can either be an address/port combination in the form V(192.168.1.1:4567), or an interface followed by a port number, like V(eth0:4567). - If the port number is omitted, the default swarm listening port is used. - Only used when swarm is initialised or joined. Because of this it is not considered for idempotency checking. type: str default: 0.0.0.0:2377 force: description: - Use with state V(present) to force creating a new Swarm, even if already part of one. - Use with state V(absent) to Leave the swarm even if this node is a manager. type: bool default: false state: description: - Set to V(present), to create/update a new cluster. - Set to V(join), to join an existing cluster. - Set to V(absent), to leave an existing cluster. - Set to V(remove), to remove an absent node from the cluster. Note that removing requires Docker SDK for Python >= 2.4.0. - M(community.docker.docker_node) can be used to demote a manager before removal. type: str default: present choices: - present - join - absent - remove node_id: description: - Swarm id of the node to remove. - Used with O(state=remove). type: str join_token: description: - Swarm token used to join a swarm cluster. - Used with O(state=join). - If this value is specified, the corresponding value in the return values will be censored by Ansible. This is a side-effect of this value not being logged. type: str remote_addrs: description: - Remote address of one or more manager nodes of an existing Swarm to connect to. - Used with O(state=join). type: list elements: str task_history_retention_limit: description: - Maximum number of tasks history stored. - Docker default value is V(5). type: int snapshot_interval: description: - Number of logs entries between snapshot. - Docker default value is V(10000). type: int keep_old_snapshots: description: - Number of snapshots to keep beyond the current snapshot. - Docker default value is V(0). type: int log_entries_for_slow_followers: description: - Number of log entries to keep around to sync up slow followers after a snapshot is created. type: int heartbeat_tick: description: - Amount of ticks (in seconds) between each heartbeat. - Docker default value is V(1) seconds. type: int election_tick: description: - Amount of ticks (in seconds) needed without a leader to trigger a new election. - Docker default value is V(10) seconds. type: int dispatcher_heartbeat_period: description: - The delay (in nanoseconds) for an agent to send a heartbeat to the dispatcher. - Docker default value is 5 seconds, which corresponds to a value of V(5000000000). type: int node_cert_expiry: description: - Automatic expiry for nodes certificates, given in nanoseconds. - Docker default value is 90 days, which corresponds to a value of V(7776000000000000). type: int name: description: - The name of the swarm. type: str labels: description: - User-defined key/value metadata. - Label operations in this module apply to the docker swarm cluster. Use M(community.docker.docker_node) module to add/modify/remove swarm node labels. - Requires API version >= 1.32. type: dict signing_ca_cert: description: - The desired signing CA certificate for all swarm node TLS leaf certificates, in PEM format. - This must not be a path to a certificate, but the contents of the certificate. - Requires API version >= 1.30. type: str signing_ca_key: description: - The desired signing CA key for all swarm node TLS leaf certificates, in PEM format. - This must not be a path to a key, but the contents of the key. - Requires API version >= 1.30. type: str ca_force_rotate: description: - An integer whose purpose is to force swarm to generate a new signing CA certificate and key, if none have been specified. - Docker default value is V(0). - Requires API version >= 1.30. type: int autolock_managers: description: - If set, generate a key and use it to lock data stored on the managers. - Docker default value is V(false). - M(community.docker.docker_swarm_info) can be used to retrieve the unlock key. type: bool rotate_worker_token: description: Rotate the worker join token. type: bool default: false rotate_manager_token: description: Rotate the manager join token. type: bool default: false data_path_addr: description: - Address or interface to use for data path traffic. - This can either be an address in the form V(192.168.1.1), or an interface, like V(eth0). - Only used when swarm is initialised or joined. Because of this it is not considered for idempotency checking. - Requires API version >= 1.30. type: str version_added: 2.5.0 data_path_port: description: - Port to use for data path traffic. - This needs to be a port number like V(9789). - Only used when swarm is initialised. Because of this it is not considered for idempotency checking. - Requires API version >= 1.40. type: int version_added: 3.1.0 requirements: - "L(Docker SDK for Python,https://docker-py.readthedocs.io/en/stable/) >= 2.0.0" - Docker API >= 1.25 author: - Thierry Bouvet (@tbouvet) - Piotr Wojciechowski (@WojciechowskiPiotr) """ EXAMPLES = r""" --- - name: Init a new swarm with default parameters community.docker.docker_swarm: state: present - name: Update swarm configuration community.docker.docker_swarm: state: present election_tick: 5 - name: Add nodes community.docker.docker_swarm: state: join advertise_addr: 192.168.1.2 join_token: SWMTKN-1--xxxxx remote_addrs: ['192.168.1.1:2377'] - name: Leave swarm for a node community.docker.docker_swarm: state: absent - name: Remove a swarm manager community.docker.docker_swarm: state: absent force: true - name: Remove node from swarm community.docker.docker_swarm: state: remove node_id: mynode - name: Init a new swarm with different data path interface community.docker.docker_swarm: state: present advertise_addr: eth0 data_path_addr: ens10 - name: Init a new swarm with a different data path port community.docker.docker_swarm: state: present data_path_port: 9789 """ RETURN = r""" swarm_facts: description: Information about swarm. returned: success type: dict contains: JoinTokens: description: Tokens to connect to the Swarm. returned: success type: dict contains: Worker: description: - Token to join the cluster as a new *worker* node. - B(Note:) if this value has been specified as O(join_token), the value here will not be the token, but C(VALUE_SPECIFIED_IN_NO_LOG_PARAMETER). If you pass O(join_token), make sure your playbook/role does not depend on this return value! returned: success type: str example: SWMTKN-1--xxxxx Manager: description: - Token to join the cluster as a new *manager* node. - B(Note:) if this value has been specified as O(join_token), the value here will not be the token, but C(VALUE_SPECIFIED_IN_NO_LOG_PARAMETER). If you pass O(join_token), make sure your playbook/role does not depend on this return value! returned: success type: str example: SWMTKN-1--xxxxx UnlockKey: description: The swarm unlock-key if O(autolock_managers=true). returned: on success if O(autolock_managers=true) and swarm is initialised, or if O(autolock_managers) has changed. type: str example: SWMKEY-1-xxx actions: description: Provides the actions done on the swarm. returned: when action failed. type: list elements: str example: ['This cluster is already a swarm cluster'] """ import json import traceback import typing as t try: from docker.errors import APIError, DockerException except ImportError: # missing Docker SDK for Python handled in ansible.module_utils.docker.common pass from ansible_collections.community.docker.plugins.module_utils._common import ( RequestException, ) from ansible_collections.community.docker.plugins.module_utils._swarm import ( AnsibleDockerSwarmClient, ) from ansible_collections.community.docker.plugins.module_utils._util import ( DifferenceTracker, DockerBaseClass, sanitize_labels, ) class TaskParameters(DockerBaseClass): def __init__(self) -> None: super().__init__() self.advertise_addr: str | None = None self.listen_addr: str | None = None self.remote_addrs: list[str] | None = None self.join_token: str | None = None self.data_path_addr: str | None = None self.data_path_port: int | None = None self.spec = None # Spec self.snapshot_interval: int | None = None self.task_history_retention_limit: int | None = None self.keep_old_snapshots: int | None = None self.log_entries_for_slow_followers: int | None = None self.heartbeat_tick: int | None = None self.election_tick: int | None = None self.dispatcher_heartbeat_period: int | None = None self.node_cert_expiry: int | None = None self.name: str | None = None self.labels: dict[str, t.Any] | None = None self.log_driver = None self.signing_ca_cert: str | None = None self.signing_ca_key: str | None = None self.ca_force_rotate: int | None = None self.autolock_managers: bool | None = None self.rotate_worker_token: bool | None = None self.rotate_manager_token: bool | None = None self.default_addr_pool: list[str] | None = None self.subnet_size: int | None = None @staticmethod def from_ansible_params(client: AnsibleDockerSwarmClient) -> TaskParameters: result = TaskParameters() for key, value in client.module.params.items(): if key in result.__dict__: setattr(result, key, value) result.update_parameters(client) return result def update_from_swarm_info(self, swarm_info: dict[str, t.Any]) -> None: spec = swarm_info["Spec"] ca_config = spec.get("CAConfig") or {} if self.node_cert_expiry is None: self.node_cert_expiry = ca_config.get("NodeCertExpiry") if self.ca_force_rotate is None: self.ca_force_rotate = ca_config.get("ForceRotate") dispatcher = spec.get("Dispatcher") or {} if self.dispatcher_heartbeat_period is None: self.dispatcher_heartbeat_period = dispatcher.get("HeartbeatPeriod") raft = spec.get("Raft") or {} if self.snapshot_interval is None: self.snapshot_interval = raft.get("SnapshotInterval") if self.keep_old_snapshots is None: self.keep_old_snapshots = raft.get("KeepOldSnapshots") if self.heartbeat_tick is None: self.heartbeat_tick = raft.get("HeartbeatTick") if self.log_entries_for_slow_followers is None: self.log_entries_for_slow_followers = raft.get("LogEntriesForSlowFollowers") if self.election_tick is None: self.election_tick = raft.get("ElectionTick") orchestration = spec.get("Orchestration") or {} if self.task_history_retention_limit is None: self.task_history_retention_limit = orchestration.get( "TaskHistoryRetentionLimit" ) encryption_config = spec.get("EncryptionConfig") or {} if self.autolock_managers is None: self.autolock_managers = encryption_config.get("AutoLockManagers") if self.name is None: self.name = spec["Name"] if self.labels is None: self.labels = spec.get("Labels") or {} if "LogDriver" in spec["TaskDefaults"]: self.log_driver = spec["TaskDefaults"]["LogDriver"] def update_parameters(self, client: AnsibleDockerSwarmClient) -> None: assign = { "snapshot_interval": "snapshot_interval", "task_history_retention_limit": "task_history_retention_limit", "keep_old_snapshots": "keep_old_snapshots", "log_entries_for_slow_followers": "log_entries_for_slow_followers", "heartbeat_tick": "heartbeat_tick", "election_tick": "election_tick", "dispatcher_heartbeat_period": "dispatcher_heartbeat_period", "node_cert_expiry": "node_cert_expiry", "name": "name", "labels": "labels", "signing_ca_cert": "signing_ca_cert", "signing_ca_key": "signing_ca_key", "ca_force_rotate": "ca_force_rotate", "autolock_managers": "autolock_managers", "log_driver": "log_driver", } params = {} for dest, source in assign.items(): if not client.option_minimal_versions[source]["supported"]: continue value = getattr(self, source) if value is not None: params[dest] = value self.spec = client.create_swarm_spec(**params) def compare_to_active( self, other: TaskParameters, client: AnsibleDockerSwarmClient, differences: DifferenceTracker, ) -> DifferenceTracker: for k in self.__dict__: if k in ( "advertise_addr", "listen_addr", "remote_addrs", "join_token", "rotate_worker_token", "rotate_manager_token", "spec", "default_addr_pool", "subnet_size", "data_path_addr", "data_path_port", ): continue if not client.option_minimal_versions[k]["supported"]: continue value = getattr(self, k) if value is None: continue other_value = getattr(other, k) if value != other_value: differences.add(k, parameter=value, active=other_value) if self.rotate_worker_token: differences.add("rotate_worker_token", parameter=True, active=False) if self.rotate_manager_token: differences.add("rotate_manager_token", parameter=True, active=False) return differences class SwarmManager(DockerBaseClass): def __init__( self, client: AnsibleDockerSwarmClient, results: dict[str, t.Any] ) -> None: super().__init__() self.client = client self.results = results self.check_mode = self.client.check_mode self.swarm_info: dict[str, t.Any] = {} self.state: t.Literal["present", "join", "absent", "remove"] = ( client.module.params["state"] ) self.force: bool = client.module.params["force"] self.node_id: str | None = client.module.params["node_id"] self.differences = DifferenceTracker() self.parameters = TaskParameters.from_ansible_params(client) self.created = False def __call__(self) -> None: choice_map = { "present": self.init_swarm, "join": self.join, "absent": self.leave, "remove": self.remove, } choice_map[self.state]() if self.client.module._diff or self.parameters.debug: diff = {} diff["before"], diff["after"] = self.differences.get_before_after() self.results["diff"] = diff def inspect_swarm(self) -> None: try: data = self.client.inspect_swarm() json_str = json.dumps(data, ensure_ascii=False) self.swarm_info = json.loads(json_str) self.results["changed"] = False self.results["swarm_facts"] = self.swarm_info unlock_key = self.get_unlock_key() self.swarm_info.update(unlock_key) except APIError: pass def get_unlock_key(self) -> dict[str, t.Any]: default = {"UnlockKey": None} if not self.has_swarm_lock_changed(): return default try: return self.client.get_unlock_key() or default except APIError: return default def has_swarm_lock_changed(self) -> bool: return bool(self.parameters.autolock_managers) and ( self.created or self.differences.has_difference_for("autolock_managers") ) def init_swarm(self) -> None: if not self.force and self.client.check_if_swarm_manager(): self.__update_swarm() return if not self.check_mode: init_arguments: dict[str, t.Any] = { "advertise_addr": self.parameters.advertise_addr, "listen_addr": self.parameters.listen_addr, "force_new_cluster": self.force, "swarm_spec": self.parameters.spec, } if self.parameters.default_addr_pool is not None: init_arguments["default_addr_pool"] = self.parameters.default_addr_pool if self.parameters.subnet_size is not None: init_arguments["subnet_size"] = self.parameters.subnet_size if self.parameters.data_path_addr is not None: init_arguments["data_path_addr"] = self.parameters.data_path_addr if self.parameters.data_path_port is not None: init_arguments["data_path_port"] = self.parameters.data_path_port try: self.client.init_swarm(**init_arguments) except APIError as exc: self.client.fail(f"Can not create a new Swarm Cluster: {exc}") if not self.client.check_if_swarm_manager(): if not self.check_mode: self.client.fail("Swarm not created or other error!") self.created = True self.inspect_swarm() self.results["actions"].append( f"New Swarm cluster created: {self.swarm_info.get('ID')}" ) self.differences.add("state", parameter="present", active="absent") self.results["changed"] = True self.results["swarm_facts"] = { "JoinTokens": self.swarm_info.get("JoinTokens"), "UnlockKey": self.swarm_info.get("UnlockKey"), } def __update_swarm(self) -> None: try: self.inspect_swarm() version = self.swarm_info["Version"]["Index"] self.parameters.update_from_swarm_info(self.swarm_info) old_parameters = TaskParameters() old_parameters.update_from_swarm_info(self.swarm_info) self.parameters.compare_to_active( old_parameters, self.client, self.differences ) if self.differences.empty: self.results["actions"].append("No modification") self.results["changed"] = False return update_parameters = TaskParameters.from_ansible_params(self.client) update_parameters.update_parameters(self.client) if not self.check_mode: self.client.update_swarm( version=version, swarm_spec=update_parameters.spec, rotate_worker_token=self.parameters.rotate_worker_token, rotate_manager_token=self.parameters.rotate_manager_token, ) except APIError as exc: self.client.fail(f"Can not update a Swarm Cluster: {exc}") self.inspect_swarm() self.results["actions"].append("Swarm cluster updated") self.results["changed"] = True def join(self) -> None: if self.client.check_if_swarm_node(): self.results["actions"].append("This node is already part of a swarm.") return if not self.check_mode: join_arguments = { "remote_addrs": self.parameters.remote_addrs, "join_token": self.parameters.join_token, "listen_addr": self.parameters.listen_addr, "advertise_addr": self.parameters.advertise_addr, } if self.parameters.data_path_addr is not None: join_arguments["data_path_addr"] = self.parameters.data_path_addr try: self.client.join_swarm(**join_arguments) except APIError as exc: self.client.fail(f"Can not join the Swarm Cluster: {exc}") self.results["actions"].append("New node is added to swarm cluster") self.differences.add("joined", parameter=True, active=False) self.results["changed"] = True def leave(self) -> None: if not self.client.check_if_swarm_node(): self.results["actions"].append("This node is not part of a swarm.") return if not self.check_mode: try: self.client.leave_swarm(force=self.force) except APIError as exc: self.client.fail(f"This node can not leave the Swarm Cluster: {exc}") self.results["actions"].append("Node has left the swarm cluster") self.differences.add("joined", parameter="absent", active="present") self.results["changed"] = True def remove(self) -> None: if not self.client.check_if_swarm_manager(): self.client.fail("This node is not a manager.") try: status_down = self.client.check_if_swarm_node_is_down( node_id=self.node_id, repeat_check=5 ) except APIError: return if not status_down: self.client.fail( "Can not remove the node. The status node is ready and not down." ) if not self.check_mode: try: self.client.remove_node(node_id=self.node_id, force=self.force) except APIError as exc: self.client.fail( f"Can not remove the node from the Swarm Cluster: {exc}" ) self.results["actions"].append("Node is removed from swarm cluster.") self.differences.add("joined", parameter=False, active=True) self.results["changed"] = True def _detect_remove_operation(client: AnsibleDockerSwarmClient) -> bool: return client.module.params["state"] == "remove" def main() -> None: # TODO: missing option log_driver? argument_spec = { "advertise_addr": {"type": "str"}, "data_path_addr": {"type": "str"}, "data_path_port": {"type": "int"}, "state": { "type": "str", "default": "present", "choices": ["present", "join", "absent", "remove"], }, "force": {"type": "bool", "default": False}, "listen_addr": {"type": "str", "default": "0.0.0.0:2377"}, "remote_addrs": {"type": "list", "elements": "str"}, "join_token": {"type": "str", "no_log": True}, "snapshot_interval": {"type": "int"}, "task_history_retention_limit": {"type": "int"}, "keep_old_snapshots": {"type": "int"}, "log_entries_for_slow_followers": {"type": "int"}, "heartbeat_tick": {"type": "int"}, "election_tick": {"type": "int"}, "dispatcher_heartbeat_period": {"type": "int"}, "node_cert_expiry": {"type": "int"}, "name": {"type": "str"}, "labels": {"type": "dict"}, "signing_ca_cert": {"type": "str"}, "signing_ca_key": {"type": "str", "no_log": True}, "ca_force_rotate": {"type": "int"}, "autolock_managers": {"type": "bool"}, "node_id": {"type": "str"}, "rotate_worker_token": {"type": "bool", "default": False}, "rotate_manager_token": {"type": "bool", "default": False}, "default_addr_pool": {"type": "list", "elements": "str"}, "subnet_size": {"type": "int"}, } required_if = [ ("state", "join", ["remote_addrs", "join_token"]), ("state", "remove", ["node_id"]), ] option_minimal_versions = { "labels": {"docker_py_version": "2.6.0", "docker_api_version": "1.32"}, "signing_ca_cert": {"docker_py_version": "2.6.0", "docker_api_version": "1.30"}, "signing_ca_key": {"docker_py_version": "2.6.0", "docker_api_version": "1.30"}, "ca_force_rotate": {"docker_py_version": "2.6.0", "docker_api_version": "1.30"}, "autolock_managers": {"docker_py_version": "2.6.0"}, "log_driver": {"docker_py_version": "2.6.0"}, "remove_operation": { "docker_py_version": "2.4.0", "detect_usage": _detect_remove_operation, "usage_msg": "remove swarm nodes", }, "default_addr_pool": { "docker_py_version": "4.0.0", "docker_api_version": "1.39", }, "subnet_size": {"docker_py_version": "4.0.0", "docker_api_version": "1.39"}, "data_path_addr": {"docker_py_version": "4.0.0", "docker_api_version": "1.30"}, "data_path_port": {"docker_py_version": "6.0.0", "docker_api_version": "1.40"}, } client = AnsibleDockerSwarmClient( argument_spec=argument_spec, supports_check_mode=True, required_if=required_if, min_docker_version="2.0.0", option_minimal_versions=option_minimal_versions, ) sanitize_labels(client.module.params["labels"], "labels", client) try: results = {"changed": False, "result": "", "actions": []} SwarmManager(client, results)() client.module.exit_json(**results) except DockerException as e: client.fail( f"An unexpected Docker error occurred: {e}", exception=traceback.format_exc(), ) except RequestException as e: client.fail( f"An unexpected requests error occurred when Docker SDK for Python tried to talk to the docker daemon: {e}", exception=traceback.format_exc(), ) if __name__ == "__main__": main()