From e8f53af92a54bdb39dd4aadbff0d196574043088 Mon Sep 17 00:00:00 2001 From: xin liang Date: Mon, 29 Jul 2024 14:20:13 +0800 Subject: [PATCH] ttt --- crmsh/bootstrap.py | 11 +- crmsh/ocfs2.py | 2 +- crmsh/qdevice.py | 6 +- crmsh/sbd.py | 586 ++++++++++++++++++++++++++++----------------- crmsh/ui_sbd.py | 127 ++++------ crmsh/utils.py | 2 +- 6 files changed, 426 insertions(+), 308 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 40d227a0dd..d99eb49a06 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -216,8 +216,11 @@ def _validate_sbd_option(self): """ Validate sbd options """ + from .sbd import SBDUtils if self.sbd_devices and self.diskless_sbd: utils.fatal("Can't use -s and -S options together") + if self.sbd_devices: + SBDUtils.verify_sbd_device(self.sbd_devices) if self.stage == "sbd": if not self.sbd_devices and not self.diskless_sbd and self.yes_to_all: utils.fatal("Stage sbd should specify sbd device by -s or diskless sbd by -S option") @@ -298,8 +301,8 @@ def validate_option(self): self._validate_sbd_option() def init_sbd_manager(self): - from .sbd import SBDManager - self.sbd_manager = SBDManager(self) + from .sbd import SBDManager2 + self.sbd_manager = SBDManager2(bootstrap_context=self) def detect_platform(self): """ @@ -501,7 +504,7 @@ def is_online(): return False # if peer_node is None, this is in the init process - if _context.cluster_node is None: + if not _context or _context.cluster_node is None: return True # In join process # If the joining node is already online but can't find the init node @@ -1397,7 +1400,7 @@ def init_sbd(): import crmsh.sbd if _context.stage == "sbd": crmsh.sbd.clean_up_existing_sbd_resource() - _context.sbd_manager.sbd_init() + _context.sbd_manager.init_and_deploy_sbd() def init_upgradeutil(): diff --git a/crmsh/ocfs2.py b/crmsh/ocfs2.py index 346cc5c20e..6b5414a42d 100644 --- a/crmsh/ocfs2.py +++ b/crmsh/ocfs2.py @@ -119,7 +119,7 @@ def _check_sbd_and_ocfs2_dev(self): """ from . import sbd if ServiceManager().service_is_enabled("sbd.service"): - sbd_device_list = sbd.SBDManager.get_sbd_device_from_config() + sbd_device_list = sbd.SBDUtils.get_sbd_device_from_config() for dev in self.ocfs2_devices: if dev in sbd_device_list: self._dynamic_raise_error("{} cannot be the same with SBD device".format(dev)) diff --git a/crmsh/qdevice.py b/crmsh/qdevice.py index 982d7a688f..27cedce45f 100644 --- a/crmsh/qdevice.py +++ b/crmsh/qdevice.py @@ -614,12 +614,12 @@ def adjust_sbd_watchdog_timeout_with_qdevice(self): """ Adjust SBD_WATCHDOG_TIMEOUT when configuring qdevice and diskless SBD """ - from .sbd import SBDManager, SBDTimeout + from .sbd import SBDManager, SBDTimeout, SBDUtils utils.check_all_nodes_reachable() - self.using_diskless_sbd = SBDManager.is_using_diskless_sbd() + self.using_diskless_sbd = SBDUtils.is_using_diskless_sbd() # add qdevice after diskless sbd started if self.using_diskless_sbd: - res = SBDManager.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT") + res = SBDUtils.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT") if not res or int(res) < SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE: sbd_watchdog_timeout_qdevice = SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE SBDManager.update_configuration({"SBD_WATCHDOG_TIMEOUT": str(sbd_watchdog_timeout_qdevice)}) diff --git a/crmsh/sbd.py b/crmsh/sbd.py index cf25bc4547..291e0938de 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -8,6 +8,7 @@ from . import constants from . import corosync from . import xmlutil +from . import watchdog from .service_manager import ServiceManager from .sh import ShellUtils @@ -15,6 +16,120 @@ logger_utils = log.LoggerUtils(logger) +class SBDUtils: + """ + Consolidate sbd related utility methods + """ + @staticmethod + def get_sbd_device_metadata(dev, timeout_only=False, remote=None) -> dict: + """ + Extract metadata from sbd device header + """ + sbd_info = {} + try: + out = sh.cluster_shell().get_stdout_or_raise_error(f"sbd -d {dev} dump", remote) + except: + return sbd_info + + pattern = r"UUID\s+:\s+(\S+)|Timeout\s+\((\w+)\)\s+:\s+(\d+)" + matches = re.findall(pattern, out) + for uuid, timeout_type, timeout_value in matches: + if uuid and not timeout_only: + sbd_info["uuid"] = uuid + elif timeout_type and timeout_value: + sbd_info[timeout_type] = int(timeout_value) + return sbd_info + + @staticmethod + def get_device_uuid(dev, node=None): + """ + Get UUID for specific device and node + """ + res = SBDUtils.get_sbd_device_metadata(dev, node).get("uuid") + if not res: + raise ValueError(f"Cannot find sbd device UUID for {dev}") + return res + + @staticmethod + def compare_device_uuid(dev, node_list): + """ + Compare local sbd device UUID with other node's sbd device UUID + """ + if not node_list: + return + local_uuid = SBDUtils.get_device_uuid(dev) + for node in node_list: + remote_uuid = SBDUtils.get_device_uuid(dev, node) + if local_uuid != remote_uuid: + raise ValueError(f"Device {dev} doesn't have the same UUID with {node}") + + @staticmethod + def verify_sbd_device(dev_list, compare_node_list=[]): + if len(dev_list) > len(set(dev_list)): + raise ValueError("Duplicate device") + if len(dev_list) > SBDManager2.SBD_DEVICE_MAX: + raise ValueError(f"Exceed max device number: {SBDManager2.SBD_DEVICE_MAX}") + for dev in dev_list: + if not utils.is_block_device(dev): + raise ValueError(f"{dev} doesn't look like a block device") + SBDUtils.compare_device_uuid(dev, compare_node_list) + + @staticmethod + def get_sbd_value_from_config(key): + """ + Get value from /etc/sysconfig/sbd + """ + return utils.parse_sysconfig(SYSCONFIG_SBD).get(key) + + @staticmethod + def get_sbd_device_from_config(): + """ + Get sbd device list from config + """ + res = SBDUtils.get_sbd_value_from_config("SBD_DEVICE") + return res.split(';') if res else [] + + @staticmethod + def is_using_diskless_sbd(): + """ + Check if using diskless SBD + """ + dev_list = SBDUtils.get_sbd_device_from_config() + return not dev_list and ServiceManager().service_is_active("sbd.service") + + @staticmethod + def has_sbd_device_already_initialized(dev) -> bool: + """ + Check if sbd device already initialized + """ + cmd = "sbd -d {} dump".format(dev) + rc, _, _ = ShellUtils().get_stdout_stderr(cmd) + return rc == 0 + + @staticmethod + def no_overwrite_device_check(dev) -> bool: + """ + Check if device already initialized and ask if need to overwrite + """ + initialized = SBDUtils.has_sbd_device_already_initialized(dev) + confirmed = bootstrap.confirm("SBD is already configured to use {} - overwrite?".format(dev)) + return initialized and not confirmed + + @staticmethod + def have_consisitent_device_metadata(dev_list) -> bool: + """ + Check if all devices have the same metadata + """ + if len(dev_list) < 2: + return True + consistent = True + for dev in dev_list[1:]: + if SBDUtils.get_sbd_device_metadata(dev) != SBDUtils.get_sbd_device_metadata(dev_list[0]): + logger.warning(f"Device {dev} doesn't have the same metadata with {dev_list[0]}") + consistent = False + return consistent + + class SBDTimeout(object): """ Consolidate sbd related timeout methods and constants @@ -34,7 +149,6 @@ def __init__(self, context=None): self.stonith_timeout = None self.sbd_watchdog_timeout = self.SBD_WATCHDOG_TIMEOUT_DEFAULT self.stonith_watchdog_timeout = self.STONITH_WATCHDOG_TIMEOUT_DEFAULT - self.sbd_delay_start = None self.two_node_without_qdevice = False def initialize_timeout(self): @@ -70,14 +184,17 @@ def _set_sbd_msgwait(self): self.sbd_msgwait = sbd_msgwait @classmethod - def get_advised_sbd_timeout(cls) -> typing.Tuple[int, int]: + def get_advised_sbd_timeout(cls, diskless=False) -> typing.Tuple[int, int]: """ Get suitable sbd_watchdog_timeout and sbd_msgwait """ ctx = bootstrap.Context() + ctx.diskless_sbd = diskless ctx.load_profiles() time_inst = cls(ctx) time_inst.initialize_timeout() + if diskless: + return time_inst.stonith_watchdog_timeout, None return time_inst.sbd_watchdog_timeout, time_inst.sbd_msgwait def _adjust_sbd_watchdog_timeout_with_diskless_and_qdevice(self): @@ -97,31 +214,12 @@ def _adjust_sbd_watchdog_timeout_with_diskless_and_qdevice(self): logger.warning("sbd_watchdog_timeout is set to {} for qdevice, it was {}".format(self.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE, self.sbd_watchdog_timeout)) self.sbd_watchdog_timeout = self.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE - @staticmethod - def get_sbd_device_metadata(dev, timeout_only=False) -> dict: - """ - Extract metadata from sbd device header - """ - sbd_info = {} - try: - out = sh.cluster_shell().get_stdout_or_raise_error("sbd -d {} dump".format(dev)) - except: - return sbd_info - pattern = r"UUID\s+:\s+(\S+)|Timeout\s+\((\w+)\)\s+:\s+(\d+)" - matches = re.findall(pattern, out) - for uuid, timeout_type, timeout_value in matches: - if uuid and not timeout_only: - sbd_info["uuid"] = uuid - elif timeout_type and timeout_value: - sbd_info[timeout_type] = int(timeout_value) - return sbd_info - @staticmethod def get_sbd_msgwait(dev): """ Get msgwait for sbd device """ - res = SBDTimeout.get_sbd_device_metadata(dev).get("msgwait") + res = SBDUtils.get_sbd_device_metadata(dev).get("msgwait") if not res: raise ValueError(f"Cannot get sbd msgwait for {dev}") return res @@ -131,7 +229,7 @@ def get_sbd_watchdog_timeout(): """ Get SBD_WATCHDOG_TIMEOUT from /etc/sysconfig/sbd """ - res = SBDManager.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT") + res = SBDUtils.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT") if not res: raise ValueError("Cannot get the value of SBD_WATCHDOG_TIMEOUT") return int(res) @@ -153,7 +251,7 @@ def _load_configurations(self): """ self.two_node_without_qdevice = utils.is_2node_cluster_without_qdevice() - dev_list = SBDManager.get_sbd_device_from_config() + dev_list = SBDUtils.get_sbd_device_from_config() if dev_list: # disk-based self.disk_based = True self.msgwait = SBDTimeout.get_sbd_msgwait(dev_list[0]) @@ -163,7 +261,7 @@ def _load_configurations(self): self.sbd_watchdog_timeout = SBDTimeout.get_sbd_watchdog_timeout() self.stonith_watchdog_timeout = SBDTimeout.get_stonith_watchdog_timeout() self.sbd_delay_start_value_expected = self.get_sbd_delay_start_expected() if utils.detect_virt() else "no" - self.sbd_delay_start_value_from_config = SBDManager.get_sbd_value_from_config("SBD_DELAY_START") + self.sbd_delay_start_value_from_config = SBDUtils.get_sbd_value_from_config("SBD_DELAY_START") logger.debug("Inspect SBDTimeout: %s", vars(self)) @@ -211,7 +309,7 @@ def get_sbd_delay_start_sec_from_sysconfig(): Get suitable systemd start timeout for sbd.service """ # TODO 5ms, 5us, 5s, 5m, 5h are also valid for sbd sysconfig - value = SBDManager.get_sbd_value_from_config("SBD_DELAY_START") + value = SBDUtils.get_sbd_value_from_config("SBD_DELAY_START") if utils.is_boolean_true(value): return 2*SBDTimeout.get_sbd_watchdog_timeout() return int(value) @@ -221,7 +319,7 @@ def is_sbd_delay_start(): """ Check if SBD_DELAY_START is not no or not set """ - res = SBDManager.get_sbd_value_from_config("SBD_DELAY_START") + res = SBDUtils.get_sbd_value_from_config("SBD_DELAY_START") return res and res != "no" @staticmethod @@ -234,7 +332,7 @@ def adjust_systemd_start_timeout(self): """ Adjust start timeout for sbd when set SBD_DELAY_START """ - sbd_delay_start_value = SBDManager.get_sbd_value_from_config("SBD_DELAY_START") + sbd_delay_start_value = SBDUtils.get_sbd_value_from_config("SBD_DELAY_START") if sbd_delay_start_value == "no": return @@ -265,7 +363,7 @@ def adjust_sbd_delay_start(self): if expected_value == "no" \ or (not re.search(r'\d+', config_value)) \ or (int(expected_value) > int(config_value)): - SBDManager.update_configuration({"SBD_DELAY_START": expected_value}) + SBDManager2.update_configuration({"SBD_DELAY_START": expected_value}) @classmethod def adjust_sbd_timeout_related_cluster_configuration(cls): @@ -282,6 +380,241 @@ def adjust_sbd_timeout_related_cluster_configuration(cls): cls_inst.adjust_systemd_start_timeout() +class SBDManager2: + SBD_STATUS_DESCRIPTION = """Configure SBD: + If you have shared storage, for example a SAN or iSCSI target, + you can use it avoid split-brain scenarios by configuring SBD. + This requires a 1 MB partition, accessible to all nodes in the + cluster. The device path must be persistent and consistent + across all nodes in the cluster, so /dev/disk/by-id/* devices + are a good choice. Note that all data on the partition you + specify here will be destroyed. +""" + NO_SBD_WARNING = "Not configuring SBD - STONITH will be disabled." + DISKLESS_SBD_WARNING = "Diskless SBD requires cluster with three or more nodes. If you want to use diskless SBD for 2-node cluster, should be combined with QDevice." + SBD_RA = "stonith:fence_sbd" + SBD_RA_ID = "stonith-sbd" + SBD_DEVICE_MAX = 3 + + def __init__( + self, + device_list_to_init: typing.List[str] | None = None, + timeout_dict: typing.Dict[str, int] | None = None, + update_dict: typing.Dict[str, str] | None = None, + diskless_sbd: bool = False, + bootstrap_context: bootstrap.Context | None = None + ): + """ + Init function which can be called from crm sbd subcommand or bootstrap + """ + self.package_installed = utils.package_is_installed("sbd") + if not self.package_installed: + return + + self.device_list_to_init = device_list_to_init or [] + self.timeout_dict = timeout_dict or {} + self.update_dict = update_dict or {} + self.diskless_sbd = diskless_sbd + self.from_bootstrap = False + self.cluster_is_running = ServiceManager().service_is_active("pacemaker.service") + + # From bootstrap init or join process, override the values + if bootstrap_context: + self.from_bootstrap = True + self.device_list_to_init = bootstrap_context.sbd_devices + self.diskless_sbd = bootstrap_context.diskless_sbd + + timeout_inst = SBDTimeout(bootstrap_context) + timeout_inst.initialize_timeout() + self.timeout_dict["watchdog"] = timeout_inst.sbd_watchdog_timeout + if not self.diskless_sbd: + self.timeout_dict["msgwait"] = timeout_inst.sbd_msgwait + self.update_dict["SBD_WATCHDOG_TIMEOUT"] = str(timeout_inst.sbd_watchdog_timeout) + self.update_dict["SBD_WATCHDOG_DEV"] = watchdog.Watchdog.get_watchdog_device(bootstrap_context.watchdog) + + self.no_overwrite_map = {} + for dev in self.device_list_to_init: + self.no_overwrite_map[dev] = SBDUtils.no_overwrite_device_check(dev) + + @staticmethod + def convert_timeout_dict_to_opt_str(timeout_dict: typing.Dict[str, int]) -> str: + timeout_option_map = { + "watchdog": "-1", + "allocate": "-2", + "loop": "-3", + "msgwait": "-4" + } + return ' '.join([f"{timeout_option_map[k]} {v}" for k, v in timeout_dict.items() + if k in timeout_option_map]) + + def update_configuration(self) -> None: + """ + Update and sync sbd configuration + """ + if not self.update_dict: + return + for key, value in self.update_dict.items(): + logger.info("Update %s in %s: %s", key, SYSCONFIG_SBD, value) + utils.sysconfig_set(SYSCONFIG_SBD, **self.update_dict) + bootstrap.sync_file(SYSCONFIG_SBD) + logger.info("Already synced %s to all nodes", SYSCONFIG_SBD) + + @classmethod + def update_sbd_configuration(cls, update_dict: typing.Dict[str, str]) -> None: + inst = cls(update_dict=update_dict) + inst.update_configuration() + + def initialize_sbd(self): + if self.diskless_sbd: + logger.info("Configuring diskless SBD") + self._warn_diskless_sbd() + return + elif not all(self.no_overwrite_map.values()): + logger.info("Configuring disk-based SBD") + + opt_str = SBDManager2.convert_timeout_dict_to_opt_str(self.timeout_dict) + shell = sh.cluster_shell() + for dev in self.device_list_to_init: + # skip if device already initialized and not overwrite + if dev in self.no_overwrite_map and self.no_overwrite_map[dev]: + continue + logger.info("Initializing SBD device %s", dev) + cmd = f"sbd {opt_str} -d {dev} create" + logger.debug("Running command: %s", cmd) + shell.get_stdout_or_raise_error(cmd) + + @staticmethod + def enable_sbd_service(): + cluster_nodes = utils.list_cluster_nodes() or [utils.this_node()] + service_manager = ServiceManager() + + for node in cluster_nodes: + if not service_manager.service_is_enabled("sbd.service", node): + logger.info("Enable sbd.service on node %s", node) + service_manager.enable_service("sbd.service", node) + + @staticmethod + def restart_cluster_if_possible(): + if not ServiceManager().service_is_active("paceamaker.service"): + return + if xmlutil.CrmMonXmlParser().is_any_resource_running(): + logger.warning("Resource is running, need to restart cluster service manually on each node") + else: + logger.info("Restarting cluster service") + utils.cluster_run_cmd("crm cluster restart") + bootstrap.wait_for_cluster() + + def configure_sbd_resource_and_properties(self): + """ + Configure stonith-sbd resource and related properties + """ + if not ServiceManager().service_is_active("paceamaker.service"): + return + if self.diskless_sbd: + utils.set_property("stonith-watchdog-timeout", SBDTimeout.STONITH_WATCHDOG_TIMEOUT_DEFAULT) + utils.set_property("stonith-timeout", constants.STONITH_TIMEOUT_DEFAULT) + else: + all_device_list = SBDUtils.get_sbd_device_from_config() + devices_param_str = f"params devices=\"{','.join(all_device_list)}\"" + cmd = f"crm configure primitive {self.SBD_RA_ID} {self.SBD_RA} {devices_param_str}" + sh.cluster_shell().get_stdout_or_raise_error(cmd) + utils.set_property("stonith-enabled", "true") + + bootstrap.adjust_properties() + + def _warn_diskless_sbd(self, peer=None): + """ + Give warning when configuring diskless sbd + """ + # When in sbd stage or join process + if (self.diskless_sbd and self.cluster_is_running) or peer: + vote_dict = utils.get_quorum_votes_dict(peer) + expected_vote = int(vote_dict.get('Expected', 0)) + if (expected_vote < 2 and peer) or (expected_vote < 3 and not peer): + logger.warning(self.DISKLESS_SBD_WARNING) + # When in init process + elif self.diskless_sbd: + logger.warning(self.DISKLESS_SBD_WARNING) + + def get_sbd_device_interactive(self): + """ + """ + if self.bootstap_context.yes_to_all: + logger.warning(self.NO_SBD_WARNING) + return + logger.info(self.SBD_STATUS_DESCRIPTION) + if not bootstrap.confirm("Do you wish to use SBD?"): + logger.warning(self.NO_SBD_WARNING) + return + + def get_sbd_device_from_bootstrap(self): + """ + Handle sbd device input from 'crm cluster init' with -s or -S option + -s is for disk-based sbd + -S is for diskless sbd + """ + dev_list = [] + # specified sbd device with -s option + if self.device_list_to_init: + dev_list = self.device_list_to_init + SBDUtils.verify_sbd_device(dev_list) + # no -s and no -S option + elif not self.diskless_sbd: + dev_list = self.get_sbd_device_interactive() + self.device_list_to_init = dev_list + + def init_and_deploy_sbd(self): + """ + The process of deploying sbd includes: + 1. Initialize sbd device + 2. Write config file /etc/sysconfig/sbd + 3. Enable sbd.service + 4. Restart cluster service if possible + 5. Configure stonith-sbd resource and related properties + """ + if not self.package_installed: + return + + if self.from_bootstrap: + self.get_sbd_device_from_bootstrap() + if not self.device_list_to_init and not self.diskless_sbd: + ServiceManager().disable_service("sbd.service") + return + + self.initialize_sbd() + self.update_configuration() + SBDManager2.enable_sbd_service() + SBDManager2.restart_cluster_if_possible() + self.configure_sbd_resource_and_properties() + + def join_sbd(self, remote_user, peer_host): + """ + Function join_sbd running on join process only + On joining process, check whether peer node has enabled sbd.service + If so, check prerequisites of SBD and verify sbd device on join node + """ + if not self.package_installed: + return + + service_manager = ServiceManager() + if not os.path.exists(SYSCONFIG_SBD) or not service_manager.service_is_enabled("sbd.service", peer_host): + service_manager.disable_service("sbd.service") + return + + from .watchdog import Watchdog + self._watchdog_inst = Watchdog(remote_user=remote_user, peer_host=peer_host) + self._watchdog_inst.join_watchdog() + + dev_list = SBDUtils.get_sbd_device_from_config() + if dev_list: + SBDUtils.verify_sbd_device(dev_list, [peer_host]) + else: + self._warn_diskless_sbd(peer_host) + + logger.info("Got {}SBD configuration".format("" if dev_list else "diskless ")) + service_manager.enable_service("sbd.service") + + class SBDManager(object): """ Class to manage sbd configuration and services @@ -321,39 +654,6 @@ def __init__(self, context): self.no_overwrite_map = {} self.no_update_config = False - @staticmethod - def _get_device_uuid(dev, node=None): - """ - Get UUID for specific device and node - """ - res = SBDTimeout.get_sbd_device_metadata(dev).get("uuid") - if not res: - raise ValueError("Cannot find sbd device UUID for {}".format(dev)) - return res - - def _compare_device_uuid(self, dev, node_list): - """ - Compare local sbd device UUID with other node's sbd device UUID - """ - if not node_list: - return - local_uuid = self._get_device_uuid(dev) - for node in node_list: - remote_uuid = self._get_device_uuid(dev, node) - if local_uuid != remote_uuid: - raise ValueError("Device {} doesn't have the same UUID with {}".format(dev, node)) - - def _verify_sbd_device(self, dev_list, compare_node_list=[]): - """ - Verify sbd device - """ - if len(dev_list) > self.SBD_DEVICE_MAX: - raise ValueError(f"Maximum number of SBD device is {self.SBD_DEVICE_MAX}") - for dev in dev_list: - if not utils.is_block_device(dev): - raise ValueError("{} doesn't look like a block device".format(dev)) - self._compare_device_uuid(dev, compare_node_list) - def _no_overwrite_check(self, dev): """ Check if device already initialized and if need to overwrite @@ -428,40 +728,6 @@ def _get_sbd_device(self): dev_list = self._get_sbd_device_interactive() self._sbd_devices = dev_list - def _initialize_sbd(self): - """ - Initialize SBD parameters according to profiles.yml, or the crmsh defined defaulst as the last resort. - This covers both disk-based-sbd, and diskless-sbd scenarios. - For diskless-sbd, set sbd_watchdog_timeout then return; - For disk-based-sbd, also calculate the msgwait value, then initialize the SBD device. - """ - if self.diskless_sbd: - logger.info("Configuring diskless SBD") - self.timeout_inst = SBDTimeout(self._context) - self.timeout_inst.initialize_timeout() - if self.diskless_sbd: - return - - opt_str = "-4 {} -1 {}".format(self.timeout_inst.sbd_msgwait, self.timeout_inst.sbd_watchdog_timeout) - device_list = [ - dev for dev in self._sbd_devices - if dev not in self.no_overwrite_map - or not self.no_overwrite_map[dev] - ] - SBDManager.initialize_sbd_device(device_list, opt_str) - - @staticmethod - def initialize_sbd_device(device_list: typing.List[str], opt_str: str) -> None: - """ - Initialize sbd device with options - """ - shell = sh.cluster_shell() - for dev in device_list: - logger.info("Initializing SBD device %s", dev) - cmd = f"sbd {opt_str} -d {dev} create" - logger.debug("Running command: %s", cmd) - shell.get_stdout_or_raise_error(cmd) - def _update_sbd_configuration(self): """ Update /etc/sysconfig/sbd @@ -479,16 +745,6 @@ def _update_sbd_configuration(self): sbd_config_dict["SBD_DEVICE"] = ';'.join(self._sbd_devices) SBDManager.update_configuration(sbd_config_dict) - def _get_sbd_device_from_config(self): - """ - Gets currently configured SBD device, i.e. what's in /etc/sysconfig/sbd - """ - res = SBDManager.get_sbd_value_from_config("SBD_DEVICE") - if res: - return utils.re_split_string(self.PARSE_RE, res) - else: - return [] - def _restart_cluster_and_configure_sbd_ra(self): """ Try to configure sbd resource, restart cluster on needed @@ -518,42 +774,6 @@ def _enable_sbd_service(self): # in init process bootstrap.invoke("systemctl enable sbd.service") - def _warn_diskless_sbd(self, peer=None): - """ - Give warning when configuring diskless sbd - """ - # When in sbd stage or join process - if (self.diskless_sbd and self._context.cluster_is_running) or peer: - vote_dict = utils.get_quorum_votes_dict(peer) - expected_vote = int(vote_dict['Expected']) - if (expected_vote < 2 and peer) or (expected_vote < 3 and not peer): - logger.warning(self.DISKLESS_SBD_WARNING) - # When in init process - elif self.diskless_sbd: - logger.warning(self.DISKLESS_SBD_WARNING) - - def sbd_init(self): - """ - Function sbd_init includes these steps: - 1. Get sbd device from options or interactive mode - 2. Initialize sbd device - 3. Write config file /etc/sysconfig/sbd - """ - from .watchdog import Watchdog - - if not utils.package_is_installed("sbd"): - return - self._watchdog_inst = Watchdog(_input=self._context.watchdog) - self._watchdog_inst.init_watchdog() - self._get_sbd_device() - if not self._sbd_devices and not self.diskless_sbd: - bootstrap.invoke("systemctl disable sbd.service") - return - self._warn_diskless_sbd() - self._initialize_sbd() - self._update_sbd_configuration() - self._enable_sbd_service() - def configure_sbd_resource_and_properties(self): """ Configure stonith-sbd resource and related properties @@ -582,88 +802,6 @@ def configure_sbd_resource_and_properties(self): if self._context.cluster_is_running: bootstrap.adjust_properties() - def join_sbd(self, remote_user, peer_host): - """ - Function join_sbd running on join process only - On joining process, check whether peer node has enabled sbd.service - If so, check prerequisites of SBD and verify sbd device on join node - """ - from .watchdog import Watchdog - - if not utils.package_is_installed("sbd"): - return - if not os.path.exists(SYSCONFIG_SBD) or not ServiceManager().service_is_enabled("sbd.service", peer_host): - bootstrap.invoke("systemctl disable sbd.service") - return - self._watchdog_inst = Watchdog(remote_user=remote_user, peer_host=peer_host) - self._watchdog_inst.join_watchdog() - dev_list = self._get_sbd_device_from_config() - if dev_list: - self._verify_sbd_device(dev_list, [peer_host]) - else: - self._warn_diskless_sbd(peer_host) - logger.info("Got {}SBD configuration".format("" if dev_list else "diskless ")) - bootstrap.invoke("systemctl enable sbd.service") - - @classmethod - def verify_sbd_device(cls, device_list=[], compare_node_list=[]): - """ - This classmethod is for verifying sbd device on a running cluster - Raise ValueError for exceptions - """ - inst = cls(bootstrap.Context()) - dev_list = device_list or inst._get_sbd_device_from_config() - if not dev_list: - raise ValueError("No sbd device configured") - inst._verify_sbd_device(dev_list, compare_node_list) - - @classmethod - def get_sbd_device_from_config(cls): - """ - Get sbd device list from config - """ - inst = cls(bootstrap.Context()) - return inst._get_sbd_device_from_config() - - @classmethod - def is_using_diskless_sbd(cls): - """ - Check if using diskless SBD - """ - inst = cls(bootstrap.Context()) - dev_list = inst._get_sbd_device_from_config() - if not dev_list and ServiceManager().service_is_active("sbd.service"): - return True - return False - - @staticmethod - def update_configuration(sbd_config_dict: typing.Dict[str, str]) -> None: - """ - Update and sync sbd configuration - """ - for key, value in sbd_config_dict.items(): - logger.info("Update %s in %s: %s", key, SYSCONFIG_SBD, value) - utils.sysconfig_set(SYSCONFIG_SBD, **sbd_config_dict) - bootstrap.sync_file(SYSCONFIG_SBD) - - @staticmethod - def get_sbd_value_from_config(key): - """ - Get value from /etc/sysconfig/sbd - """ - conf = utils.parse_sysconfig(SYSCONFIG_SBD) - res = conf.get(key) - return res - - @staticmethod - def has_sbd_device_already_initialized(dev): - """ - Check if sbd device already initialized - """ - cmd = "sbd -d {} dump".format(dev) - rc, _, _ = ShellUtils().get_stdout_stderr(cmd) - return rc == 0 - def clean_up_existing_sbd_resource(): if xmlutil.CrmMonXmlParser().is_resource_configured(SBDManager.SBD_RA): diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 34107cb0fa..d2ef327dca 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -23,7 +23,7 @@ def sbd_devices_completer(completed_list: typing.List[str]) -> typing.List[str]: ''' if not ServiceManager().service_is_active("sbd.service"): return [] - dev_list = sbd.SBDManager.get_sbd_device_from_config() + dev_list = sbd.SBDUtils.get_sbd_device_from_config() if dev_list: return [dev for dev in dev_list if dev not in completed_list] return [] @@ -37,7 +37,7 @@ def sbd_configure_completer(completed_list: typing.List[str]) -> typing.List[str if not service_manager.service_is_active("pacemaker.service"): return [] sbd_service_is_enabled = service_manager.service_is_enabled("sbd.service") - dev_list = sbd.SBDManager.get_sbd_device_from_config() + dev_list = sbd.SBDUtils.get_sbd_device_from_config() # Show disk-based sbd configure options # if there are devices in config or sbd.service is not enabled is_diskbased = bool(dev_list) or not sbd_service_is_enabled @@ -110,10 +110,10 @@ class SyntaxError(Exception): def __init__(self): command.UI.__init__(self) - self.device_list_from_config = sbd.SBDManager.get_sbd_device_from_config() + self.device_list_from_config = sbd.SBDUtils.get_sbd_device_from_config() self.device_meta_dict_runtime = {} if self.device_list_from_config: - self.device_meta_dict_runtime = sbd.SBDTimeout.get_sbd_device_metadata(self.device_list_from_config[0], timeout_only=True) + self.device_meta_dict_runtime = sbd.SBDUtils.get_sbd_device_metadata(self.device_list_from_config[0], timeout_only=True) else: try: self.watchdog_timeout_from_config = sbd.SBDTimeout.get_sbd_watchdog_timeout() @@ -235,121 +235,98 @@ def _parse_args(self, args: typing.List[str]) -> dict[str, int|str|list[str]]: else: raise self.SyntaxError(f"Unknown argument: {arg}") - # disk-based sbd case, need to verify device list - if "device-list" in parameter_dict: - device_list = parameter_dict["device-list"] - if device_list: - if len(device_list) > len(set(device_list)): - raise self.SyntaxError("Duplicate device") - sbd.SBDManager.verify_sbd_device(list(set(device_list)-set(self.device_list_from_config))) - if len(set(device_list)|set(self.device_list_from_config)) > sbd.SBDManager.SBD_DEVICE_MAX: - raise self.SyntaxError(f"Exceed max device number: {sbd.SBDManager.SBD_DEVICE_MAX}") - # no device specified and no device in sysconfig - elif not self.device_list_from_config: - raise self.SyntaxError("No device specified") - watchdog_device = parameter_dict.get("watchdog-device") parameter_dict["watchdog-device"] = watchdog.Watchdog.get_watchdog_device(watchdog_device) logger.debug("Parsed arguments: %s", parameter_dict) return parameter_dict - def _has_specified_timeout(self, timeout_dict: dict) -> bool: - return timeout_dict and timeout_dict != self.device_meta_dict_runtime - @staticmethod - def _check_and_adjust_timeout(timeout_dict: typing.Dict[str, int]) -> typing.Dict[str, int]: + def _adjust_timeout_dict(timeout_dict: dict, diskless: bool = False) -> dict: watchdog_timeout = timeout_dict.get("watchdog") if not watchdog_timeout: - watchdog_timeout, _ = sbd.SBDTimeout.get_advised_sbd_timeout() - logger.info("No watchdog-timeout specified, use advised value: %d", watchdog_timeout) + watchdog_timeout, _ = sbd.SBDTimeout.get_advised_sbd_timeout(diskless) + logger.info("No watchdog timeout specified, use advised value: %s", watchdog_timeout) timeout_dict["watchdog"] = watchdog_timeout + if diskless: + return timeout_dict + msgwait_timeout = timeout_dict.get("msgwait") if not msgwait_timeout: msgwait_timeout = 2*watchdog_timeout - logger.info("No msgwait-timeout specified, use 2*watchdog-timeout: %d", msgwait_timeout) + logger.info("No msgwait timeout specified, use 2*watchdog timeout: %s", msgwait_timeout) timeout_dict["msgwait"] = msgwait_timeout if msgwait_timeout < 2*watchdog_timeout: - logger.warning("It's recommended that msgwait-timeout(now:%d) should be at least 2*watchdog-timeout(now:%d)", - msgwait_timeout, watchdog_timeout) + logger.warning("It's recommended to set msgwait timeout >= 2*watchdog timeout") return timeout_dict def _configure_diskbase(self, parameter_dict: dict): ''' + Configure disk-based SBD based on input parameters and runtime config ''' + update_dict = {} device_list = parameter_dict.get("device-list", []) - all_device_list = list(dict.fromkeys(self.device_list_from_config + device_list)) - new_device_list = list(set(device_list) - set(self.device_list_from_config)) + if not device_list and not self.device_list_from_config: + raise self.SyntaxError("No device specified") + watchdog_device = parameter_dict.get("watchdog-device") + if watchdog_device != self.watchdog_device_from_config: + update_dict["SBD_WATCHDOG_DEV"] = watchdog_device timeout_dict = {k: v for k, v in parameter_dict.items() if k in self.TIMEOUT_TYPES} + all_device_list = list( + dict.fromkeys(self.device_list_from_config + device_list) + ) + sbd.SBDUtils.verify_sbd_device(all_device_list) + new_device_list = list( + set(device_list) - set(self.device_list_from_config) + ) + if new_device_list: + update_dict["SBD_DEVICE"] = ";".join(all_device_list) + device_list_to_init = [] - # initialize new devices if timeout parameters are not specified - # or it is a subset of runtime metadata + # initialize new devices only if no timeout parameter specified or timeout parameter is already in runtime config if not timeout_dict or utils.is_subdict(timeout_dict, self.device_meta_dict_runtime): device_list_to_init = new_device_list - # else initialize all devices + # initialize all devices else: device_list_to_init = all_device_list - # merge runtime metadata with new timeout parameters - timeout_dict = self.device_meta_dict_runtime | timeout_dict - timeout_dict = SBD._check_and_adjust_timeout(timeout_dict) - timeout_opt_str = SBD._convert_meta_dict_to_str(timeout_dict) - sbd.SBDManager.initialize_sbd_device(device_list=device_list_to_init, opt_str=timeout_opt_str) - - update_dict = {} - if new_device_list: - update_dict = {"SBD_DEVICE": ";".join(all_device_list)} - watchdog_device = parameter_dict.get("watchdog-device") - if watchdog_device and watchdog_device != self.watchdog_device_from_config: - update_dict["SBD_WATCHDOG_DEV"] = watchdog_device - watchdog_timeout = parameter_dict.get("watchdog") - if watchdog_timeout and watchdog_timeout != self.device_meta_dict_runtime.get("watchdog"): + # merge runtime timeout dict with new timeout dict + timeout_dict = self.device_meta_dict_runtime | timeout_dict + # adjust watchdog and msgwait timeout + timeout_dict = self._adjust_timeout_dict(timeout_dict) + watchdog_timeout = timeout_dict.get("watchdog") + if watchdog_timeout != self.watchdog_timeout_from_config: update_dict["SBD_WATCHDOG_TIMEOUT"] = str(watchdog_timeout) - if update_dict: - sbd.SBDManager.update_configuration(update_dict) - - msgwait_timeout = parameter_dict.get("msgwait") - if msgwait_timeout and msgwait_timeout != self.device_meta_dict_runtime.get("msgwait"): - sbd.SBDTimeout.adjust_sbd_timeout_related_cluster_configuration() - - sbd.enable_sbd_on_cluster() - if update_dict: - logger.info(self.SYNCED_INFO) - logger.info(self.RESTART_INFO) + sbd_manager = sbd.SBDManager2( + device_list_to_init=device_list_to_init, + timeout_dict=timeout_dict, + update_dict=update_dict + ) + sbd_manager.init_and_deploy_sbd() + def _configure_diskless(self, parameter_dict: dict): ''' + Configure diskless SBD based on input parameters and runtime config ''' update_dict = {} + parameter_dict = self._adjust_timeout_dict(parameter_dict, diskless=True) watchdog_timeout = parameter_dict.get("watchdog") if watchdog_timeout and watchdog_timeout != self.watchdog_timeout_from_config: update_dict["SBD_WATCHDOG_TIMEOUT"] = str(watchdog_timeout) watchdog_device = parameter_dict.get("watchdog-device") - if watchdog_device and watchdog_device != self.watchdog_device_from_config: + if watchdog_device != self.watchdog_device_from_config: update_dict["SBD_WATCHDOG_DEV"] = watchdog_device - if update_dict: - sbd.SBDManager.update_configuration(update_dict) - if watchdog_timeout and watchdog_timeout != self.watchdog_timeout_from_config: - sbd.SBDTimeout.adjust_sbd_timeout_related_cluster_configuration() - if update_dict: - logger.info(self.SYNCED_INFO) - - @staticmethod - def _convert_meta_dict_to_str(meta_dict: dict) -> str: - timeout_option_mapping = { - "watchdog": "-1", - "allocate": "-2", - "loop": "-3", - "msgwait": "-4" - } - timeout_opt_list = [f"{timeout_option_mapping[k]} {v}" for k, v in meta_dict.items() - if k in timeout_option_mapping] - return ' '.join(timeout_opt_list) + sbd_manager = sbd.SBDManager2( + update_dict=update_dict, + diskless_sbd=True + ) + sbd_manager.init_and_deploy_sbd() @command.completers_repeating(sbd_configure_completer) def do_configure(self, context, *args) -> bool: diff --git a/crmsh/utils.py b/crmsh/utils.py index 666cb8c740..d3f3027c6f 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -2521,7 +2521,7 @@ def has_stonith_running(): from . import sbd out = sh.cluster_shell().get_stdout_or_raise_error("stonith_admin -L") has_stonith_device = re.search("[1-9]+ fence device[s]* found", out) is not None - using_diskless_sbd = sbd.SBDManager.is_using_diskless_sbd() + using_diskless_sbd = sbd.SBDUtils.is_using_diskless_sbd() return has_stonith_device or using_diskless_sbd