Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'crm sbd' sub-level (jsc#PED-8256) #1491

Open
wants to merge 29 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
1a3ce1b
Dev: utils: Introduced `detect_duplicate_device_path` function in utils
liangxin1300 Oct 11, 2024
4856ee7
Dev: ui_sbd: Add new 'crm sbd' sublevel (jsc#PED-8256)
liangxin1300 Jun 14, 2024
1f56ed8
Dev: behave: Adjust functional test for previous changes
liangxin1300 Aug 13, 2024
c5adeed
Dev: doc: Add help info for crm sbd sublevel
liangxin1300 Aug 19, 2024
4c93b3d
Dev: ui_sbd: Add property/sysconfig section header for sbd configure …
liangxin1300 Sep 11, 2024
dc76bda
Dev: ui_sbd: No need to consider static case when calling crm configu…
liangxin1300 Sep 11, 2024
4681854
Dev: ui_sbd: Catch both stderr and stdout for crm resource status
liangxin1300 Sep 11, 2024
b9242cc
Dev: ui_sbd: Update regex for parsing SBD device by partlabel
liangxin1300 Sep 11, 2024
7fd5d96
Dev: ui_sbd: Clean up existing fence_sbd resource before configure di…
liangxin1300 Sep 11, 2024
b3383a9
Dev: ui_sbd: Minor changes to the code
liangxin1300 Sep 11, 2024
3294086
Dev: bootstrap: Check if sbd package is installed in the right place
liangxin1300 Sep 13, 2024
588022b
Dev: ui_sbd: Refactor do_status method
liangxin1300 Sep 13, 2024
96d9d9a
Dev: Refactor the code to avoid circular import
liangxin1300 Sep 19, 2024
35685f2
Dev: report: Dump output of 'crm sbd configure show' and 'crm sbd sta…
liangxin1300 Sep 20, 2024
6b5d7eb
Dev: ui_sbd: No need to specify device="" when trying to modify prope…
liangxin1300 Sep 20, 2024
84a2db2
Dev: ui_sbd: Add sbd device sub command
liangxin1300 Sep 30, 2024
b8395cb
Dev: ui_sbd: Replace sbd remove as sbd disable sub-command
liangxin1300 Oct 14, 2024
a6b1307
Dev: ui_sbd: Adjust sbd confiure interface
liangxin1300 Oct 15, 2024
9c0e728
Dev: ui_sbd: Check if the adding device is already initialized
liangxin1300 Oct 17, 2024
e875426
Dev: bootstrap: Add a log info when starting pacemaker.service
liangxin1300 Oct 23, 2024
d3338f3
Dev: ui_sbd: Reuse sbd.SBDManager.restart_cluster_if_possible
liangxin1300 Oct 29, 2024
3c6061c
Dev: ui_sbd: Check if node is reachable when getting the node list
liangxin1300 Oct 29, 2024
2ca9e58
Dev: sbd: Move constants.SHOW_SBD_START_TIMEOUT_CMD to sbd.py
liangxin1300 Oct 29, 2024
b544027
Dev: sh: Add get_rc_output_without_input in ClusterShell
liangxin1300 Oct 29, 2024
d46228d
Dev: ui_sbd: Replace 'sbd disable' as 'sbd purge'
liangxin1300 Nov 13, 2024
6f3f767
Dev: doc: Upadate crm.8.adoc for SBD help text
liangxin1300 Sep 11, 2024
cfbf1dc
Dev: behave: Add sbd_ui.feature to test the crm sbd UI
liangxin1300 Jul 16, 2024
062a469
Dev: sbd: Split get_sbd_device_interactive into smaller functions
liangxin1300 Nov 15, 2024
79535f6
Dev: unittests: Adjust unit test for previous commits
liangxin1300 Sep 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 35 additions & 23 deletions crmsh/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
from .sh import ShellUtils
from .ui_node import NodeMgmt
from .user_of_host import UserOfHost, UserNotFoundError
from .sbd import SBDUtils, SBDManager, SBDTimeout
from . import watchdog
import crmsh.healthcheck


Expand All @@ -55,21 +57,18 @@
COROSYNC_AUTH = "/etc/corosync/authkey"
CRM_CFG = "/etc/crm/crm.conf"
PROFILES_FILE = "/etc/crm/profiles.yml"
SYSCONFIG_SBD = "/etc/sysconfig/sbd"
SYSCONFIG_PCMK = "/etc/sysconfig/pacemaker"
SYSCONFIG_NFS = "/etc/sysconfig/nfs"
PCMK_REMOTE_AUTH = "/etc/pacemaker/authkey"
COROSYNC_CONF_ORIG = tmpfiles.create()[1]
SERVICES_STOP_LIST = ["corosync-qdevice.service", "corosync.service", "hawk.service", CSYNC2_SERVICE]
WATCHDOG_CFG = "/etc/modules-load.d/watchdog.conf"
BOOTH_DIR = "/etc/booth"
BOOTH_CFG = "/etc/booth/booth.conf"
BOOTH_AUTH = "/etc/booth/authkey"
SBD_SYSTEMD_DELAY_START_DIR = "/etc/systemd/system/sbd.service.d"
FILES_TO_SYNC = (BOOTH_DIR, corosync.conf(), COROSYNC_AUTH, CSYNC2_CFG, CSYNC2_KEY, "/etc/ctdb/nodes",
"/etc/drbd.conf", "/etc/drbd.d", "/etc/ha.d/ldirectord.cf", "/etc/lvm/lvm.conf", "/etc/multipath.conf",
"/etc/samba/smb.conf", SYSCONFIG_NFS, SYSCONFIG_PCMK, SYSCONFIG_SBD, PCMK_REMOTE_AUTH, WATCHDOG_CFG,
PROFILES_FILE, CRM_CFG, SBD_SYSTEMD_DELAY_START_DIR)
"/etc/samba/smb.conf", SYSCONFIG_NFS, SYSCONFIG_PCMK, SBDManager.SYSCONFIG_SBD, PCMK_REMOTE_AUTH, watchdog.Watchdog.WATCHDOG_CFG,
PROFILES_FILE, CRM_CFG, SBDManager.SBD_SYSTEMD_DELAY_START_DIR)

INIT_STAGES_EXTERNAL = ("ssh", "csync2", "corosync", "sbd", "cluster", "admin", "qdevice")
INIT_STAGES_INTERNAL = ("csync2_remote", "qnetd_remote")
Expand Down Expand Up @@ -132,7 +131,7 @@
self.profiles_dict = {}
self.default_nic = None
self.default_ip_list = []
self.rm_list = [SYSCONFIG_SBD, CSYNC2_CFG, corosync.conf(), CSYNC2_KEY,
self.rm_list = [SBDManager.SYSCONFIG_SBD, CSYNC2_CFG, corosync.conf(), CSYNC2_KEY,
COROSYNC_AUTH, "/var/lib/heartbeat/crm/*", "/var/lib/pacemaker/cib/*",
"/var/lib/corosync/*", "/var/lib/pacemaker/pengine/*", PCMK_REMOTE_AUTH,
"/var/lib/csync2/*", "~/.config/crm/*"]
Expand Down Expand Up @@ -211,12 +210,21 @@
"""
Validate sbd options
"""
with_sbd_option = self.sbd_devices or self.diskless_sbd
sbd_installed = utils.package_is_installed("sbd")

if with_sbd_option and not sbd_installed:
utils.fatal(SBDManager.SBD_NOT_INSTALLED_MSG)

Check warning on line 217 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L217

Added line #L217 was not covered by tests
if self.sbd_devices and self.diskless_sbd:
utils.fatal("Can't use -s and -S options together")
if self.sbd_devices:
SBDUtils.verify_sbd_device(self.sbd_devices)

Check warning on line 221 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L221

Added line #L221 was not covered by tests
if self.stage == "sbd":
if not self.sbd_devices and not self.diskless_sbd and self.yes_to_all:
if not sbd_installed:
utils.fatal(SBDManager.SBD_NOT_INSTALLED_MSG)

Check warning on line 224 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L224

Added line #L224 was not covered by tests
if not with_sbd_option and self.yes_to_all:
utils.fatal("Stage sbd should specify sbd device by -s or diskless sbd by -S option")
if ServiceManager().service_is_active("sbd.service") and not config.core.force:
if ServiceManager().service_is_active(constants.SBD_SERVICE) and not config.core.force:
utils.fatal("Can't configure stage sbd: sbd.service already running! Please use crm option '-F' if need to redeploy")
if self.cluster_is_running:
utils.check_all_nodes_reachable()
Expand Down Expand Up @@ -291,8 +299,7 @@
self._validate_sbd_option()

def init_sbd_manager(self):
from .sbd import SBDManager
self.sbd_manager = SBDManager(self)
self.sbd_manager = SBDManager(bootstrap_context=self)

def detect_platform(self):
"""
Expand Down Expand Up @@ -394,7 +401,7 @@


def confirm(msg):
if _context.yes_to_all:
if config.core.force or (_context and _context.yes_to_all):
return True
disable_completion()
rc = logger_utils.confirm(msg)
Expand All @@ -404,12 +411,12 @@


def disable_completion():
if _context.ui_context:
if _context and _context.ui_context:

Check warning on line 414 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L414

Added line #L414 was not covered by tests
_context.ui_context.disable_completion()


def enable_completion():
if _context.ui_context:
if _context and _context.ui_context:

Check warning on line 419 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L419

Added line #L419 was not covered by tests
_context.ui_context.setup_readline()


Expand Down Expand Up @@ -763,11 +770,10 @@

Return success node list
"""
from .sbd import SBDTimeout
# not _context means not in init or join process
if not _context and \
utils.package_is_installed("sbd") and \
ServiceManager().service_is_enabled("sbd.service") and \
ServiceManager().service_is_enabled(constants.SBD_SERVICE) and \
SBDTimeout.is_sbd_delay_start():
target_dir = "/run/systemd/system/sbd.service.d/"
cmd1 = "mkdir -p {}".format(target_dir)
Expand All @@ -787,6 +793,7 @@
except ValueError as err:
node_list.remove(node)
logger.error(err)
logger.info("Starting %s on %s", constants.PCMK_SERVICE, ', '.join(node_list) or utils.this_node())
return service_manager.start_service("pacemaker.service", enable=enable_flag, node_list=node_list)


Expand Down Expand Up @@ -1393,8 +1400,8 @@
"""
import crmsh.sbd
if _context.stage == "sbd":
crmsh.sbd.clean_up_existing_sbd_resource()
_context.sbd_manager.sbd_init()
crmsh.sbd.cleanup_existing_sbd_resource()

Check warning on line 1403 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L1403

Added line #L1403 was not covered by tests
_context.sbd_manager.init_and_deploy_sbd()


def init_cluster():
Expand All @@ -1419,7 +1426,9 @@
rsc_defaults rsc-options: resource-stickiness=1 migration-threshold=3
""")

_context.sbd_manager.configure_sbd_resource_and_properties()
if ServiceManager().service_is_enabled(constants.SBD_SERVICE):
_context.sbd_manager.configure_sbd()

Check warning on line 1430 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L1430

Added line #L1430 was not covered by tests



def init_admin():
Expand Down Expand Up @@ -2045,8 +2054,7 @@
shell.get_stdout_or_raise_error("rm -f {}".format(' '.join(_context.rm_list)), remote)
# restore original sbd configuration file from /usr/share/fillup-templates/sysconfig.sbd
if utils.package_is_installed("sbd", remote_addr=remote):
from .sbd import SBDManager
cmd = "cp {} {}".format(SBDManager.SYSCONFIG_SBD_TEMPLATE, SYSCONFIG_SBD)
cmd = "cp {} {}".format(SBDManager.SYSCONFIG_SBD_TEMPLATE, SBDManager.SYSCONFIG_SBD)
shell.get_stdout_or_raise_error(cmd, remote)


Expand Down Expand Up @@ -2669,8 +2677,7 @@
"""
Adjust stonith-timeout for sbd and other scenarios
"""
if ServiceManager().service_is_active("sbd.service"):
from .sbd import SBDTimeout
if ServiceManager().service_is_active(constants.SBD_SERVICE):
SBDTimeout.adjust_sbd_timeout_related_cluster_configuration()
else:
value = get_stonith_timeout_generally_expected()
Expand Down Expand Up @@ -2733,7 +2740,12 @@
"""
Sync files between cluster nodes
"""
if _context.skip_csync2:
if _context:
skip_csync2 = _context.skip_csync2
else:
skip_csync2 = not ServiceManager().service_is_active(CSYNC2_SERVICE)

Check warning on line 2746 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L2746

Added line #L2746 was not covered by tests

if skip_csync2:
utils.cluster_copy_file(path, nodes=_context.node_list_in_cluster, output=False)
else:
csync2_update(path)
Expand Down
3 changes: 3 additions & 0 deletions crmsh/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,4 +450,7 @@
HIDDEN_COMMANDS = {'ms'}

NO_SSH_ERROR_MSG = "ssh-related operations are disabled. crmsh works in local mode."

PCMK_SERVICE = "pacemaker.service"
SBD_SERVICE = "sbd.service"
# vim:ts=4:sw=4:et:
8 changes: 4 additions & 4 deletions crmsh/qdevice.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from . import lock
from . import log
from .service_manager import ServiceManager
from .sbd import SBDManager, SBDTimeout, SBDUtils


logger = log.setup_logger(__name__)
Expand Down Expand Up @@ -612,15 +613,14 @@ def adjust_sbd_watchdog_timeout_with_qdevice(self):
"""
Adjust SBD_WATCHDOG_TIMEOUT when configuring qdevice and diskless SBD
"""
from .sbd import SBDManager, SBDTimeout
utils.check_all_nodes_reachable()
self.using_diskless_sbd = SBDManager.is_using_diskless_sbd()
self.using_diskless_sbd = SBDUtils.is_using_diskless_sbd()
# add qdevice after diskless sbd started
if self.using_diskless_sbd:
res = SBDManager.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT")
res = SBDUtils.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT")
if not res or int(res) < SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE:
sbd_watchdog_timeout_qdevice = SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE
SBDManager.update_configuration({"SBD_WATCHDOG_TIMEOUT": str(sbd_watchdog_timeout_qdevice)})
SBDManager.update_sbd_configuration({"SBD_WATCHDOG_TIMEOUT": str(sbd_watchdog_timeout_qdevice)})
utils.set_property("stonith-timeout", SBDTimeout.get_stonith_timeout())

@qnetd_lock_for_same_cluster_name
Expand Down
13 changes: 9 additions & 4 deletions crmsh/report/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,11 +378,16 @@ def collect_sbd_info(context: core.Context) -> None:
return

sbd_f = os.path.join(context.work_dir, constants.SBD_F)
cmd = ". {};export SBD_DEVICE;{};{}".format(constants.SBDCONF, "sbd dump", "sbd list")
cmd_list = [
f". {constants.SBDCONF};export SBD_DEVICE;sbd dump;sbd list",
"crm sbd configure show",
"crm sbd status"
]
with open(sbd_f, "w") as f:
f.write("\n\n#=====[ Command ] ==========================#\n")
f.write(f"# {cmd}\n")
f.write(utils.get_cmd_output(cmd))
for cmd in cmd_list:
f.write("\n\n#=====[ Command ] ==========================#\n")
f.write(f"# {cmd}\n")
f.write(utils.get_cmd_output(cmd))

logger.debug(f"Dump SBD config file into {utils.real_path(sbd_f)}")

Expand Down
2 changes: 1 addition & 1 deletion crmsh/report/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ def get_cmd_output(cmd: str, timeout: int = None) -> str:
out_str += f"{out}\n"
if err:
out_str += f"{err}\n"
return out_str
return crmutils.strip_ansi_escape_sequences(out_str)


def get_timespan_str(context: core.Context) -> str:
Expand Down
Loading