def quorum_unblock_cmd(lib, argv, modifiers): """ Options: * --force - no error when removing non existing property and no warning about this action """ modifiers.ensure_only_supported("--force") if argv: raise CmdLineInputError() output, retval = utils.run( ["corosync-cmapctl", "-g", "runtime.votequorum.wait_for_all_status"] ) if retval != 0: utils.err("unable to check quorum status") if output.split("=")[-1].strip() != "1": utils.err("cluster is not waiting for nodes to establish quorum") all_nodes, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade() ) if report_list: utils.process_library_reports(report_list) unjoined_nodes = set(all_nodes) - set(utils.getCorosyncActiveNodes()) if not unjoined_nodes: utils.err("no unjoined nodes found") if not modifiers.get("--force"): answer = utils.get_terminal_input( ( "WARNING: If node(s) {nodes} are not powered off or they do" + " have access to shared resources, data corruption and/or" + " cluster failure may occur. Are you sure you want to" + " continue? [y/N] " ).format(nodes=", ".join(unjoined_nodes)) ) if answer.lower() not in ["y", "yes"]: print("Canceled") return for node in unjoined_nodes: # pass --force so no warning will be displayed stonith.stonith_confirm( lib, [node], parse_args.InputModifiers({"--force": ""}) ) output, retval = utils.run( ["corosync-cmapctl", "-s", "quorum.cancel_wait_for_all", "u8", "1"] ) if retval != 0: utils.err("unable to cancel waiting for nodes") print("Quorum unblocked") startup_fencing = utils.get_set_properties().get("startup-fencing", "") utils.set_cib_property( "startup-fencing", "false" if startup_fencing.lower() != "false" else "true" ) utils.set_cib_property("startup-fencing", startup_fencing) print("Waiting for nodes canceled")
def quorum_unblock_cmd(lib, argv, modifiers): """ Options: * --force - no error when removing non existing property and no warning about this action """ modifiers.ensure_only_supported("--force") if argv: raise CmdLineInputError() output, retval = utils.run( ["corosync-cmapctl", "-g", "runtime.votequorum.wait_for_all_status"]) if retval != 0: utils.err("unable to check quorum status") if output.rsplit("=", maxsplit=1)[-1].strip() != "1": utils.err("cluster is not waiting for nodes to establish quorum") all_nodes, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade()) if report_list: process_library_reports(report_list) unjoined_nodes = set(all_nodes) - set(utils.getCorosyncActiveNodes()) if not unjoined_nodes: utils.err("no unjoined nodes found") if not utils.get_continue_confirmation_or_force( f"If node(s) {format_list(unjoined_nodes)} are not powered off or they " "do have access to shared resources, data corruption and/or cluster " "failure may occur", modifiers.get("--force"), ): return for node in unjoined_nodes: # pass --force so no warning will be displayed stonith.stonith_confirm(lib, [node], parse_args.InputModifiers({"--force": ""})) output, retval = utils.run( ["corosync-cmapctl", "-s", "quorum.cancel_wait_for_all", "u8", "1"]) if retval != 0: utils.err("unable to cancel waiting for nodes") print_to_stderr("Quorum unblocked") startup_fencing = utils.get_set_properties().get("startup-fencing", "") utils.set_cib_property( "startup-fencing", "false" if startup_fencing.lower() != "false" else "true", ) utils.set_cib_property("startup-fencing", startup_fencing) print_to_stderr("Waiting for nodes canceled")
def cluster_pcsd_status(lib, argv, modifiers, dont_exit=False): """ Options: * --request-timeout - HTTP timeout for node authorization check """ modifiers.ensure_only_supported("--request-timeout") bad_nodes = False if len(argv) == 0: nodes = utils.get_corosync_conf_facade().get_nodes_names() if len(nodes) == 0: utils.err("no nodes found in corosync.conf") bad_nodes = check_nodes(nodes, " ") else: bad_nodes = check_nodes(argv, " ") if bad_nodes and not dont_exit: sys.exit(2)
def quorum_unblock_cmd(lib, argv, modifiers): """ Options: * --force - no error when removing non existing property and no warning about this action """ modifiers.ensure_only_supported("--force") if len(argv) > 0: raise CmdLineInputError() output, retval = utils.run( ["corosync-cmapctl", "-g", "runtime.votequorum.wait_for_all_status"]) if retval != 0: utils.err("unable to check quorum status") if output.split("=")[-1].strip() != "1": utils.err("cluster is not waiting for nodes to establish quorum") unjoined_nodes = (set(utils.get_corosync_conf_facade().get_nodes_names()) - set(utils.getCorosyncActiveNodes())) if not unjoined_nodes: utils.err("no unjoined nodes found") if modifiers.get("--force"): answer = utils.get_terminal_input( ("WARNING: If node(s) {nodes} are not powered off or they do" + " have access to shared resources, data corruption and/or" + " cluster failure may occur. Are you sure you want to" + " continue? [y/N] ").format(nodes=", ".join(unjoined_nodes))) if answer.lower() not in ["y", "yes"]: print("Canceled") return for node in unjoined_nodes: # pass --force so no warning will be displayed stonith.stonith_confirm(lib, [node], parse_args.InputModifiers({"--force": ""})) output, retval = utils.run( ["corosync-cmapctl", "-s", "quorum.cancel_wait_for_all", "u8", "1"]) if retval != 0: utils.err("unable to cancel waiting for nodes") print("Quorum unblocked") startup_fencing = utils.get_set_properties().get("startup-fencing", "") utils.set_cib_property( "startup-fencing", "false" if startup_fencing.lower() != "false" else "true") utils.set_cib_property("startup-fencing", startup_fencing) print("Waiting for nodes canceled")
def cluster_pcsd_status(lib, argv, modifiers, dont_exit=False): """ Options: * --request-timeout - HTTP timeout for node authorization check """ del lib modifiers.ensure_only_supported("--request-timeout") bad_nodes = False if not argv: nodes, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade()) if not nodes and not dont_exit: report_list.append(reports.corosync_config_no_nodes_defined()) if report_list: process_library_reports(report_list) bad_nodes = check_nodes(nodes, " ") else: bad_nodes = check_nodes(argv, " ") if bad_nodes and not dont_exit: sys.exit(2)
def quorum_unblock_cmd(argv): if len(argv) > 0: usage.quorum(["unblock"]) sys.exit(1) output, retval = utils.run( ["corosync-cmapctl", "-g", "runtime.votequorum.wait_for_all_status"]) if retval != 0: utils.err("unable to check quorum status") if output.split("=")[-1].strip() != "1": utils.err("cluster is not waiting for nodes to establish quorum") unjoined_nodes = (set(utils.get_corosync_conf_facade().get_nodes_names()) - set(utils.getCorosyncActiveNodes())) if not unjoined_nodes: utils.err("no unjoined nodes found") if "--force" not in utils.pcs_options: answer = utils.get_terminal_input( ("WARNING: If node(s) {nodes} are not powered off or they do" + " have access to shared resources, data corruption and/or" + " cluster failure may occur. Are you sure you want to" + " continue? [y/N] ").format(nodes=", ".join(unjoined_nodes))) if answer.lower() not in ["y", "yes"]: print("Canceled") return for node in unjoined_nodes: stonith.stonith_confirm([node], skip_question=True) output, retval = utils.run( ["corosync-cmapctl", "-s", "quorum.cancel_wait_for_all", "u8", "1"]) if retval != 0: utils.err("unable to cancel waiting for nodes") print("Quorum unblocked") startup_fencing = utils.get_set_properties().get("startup-fencing", "") utils.set_cib_property( "startup-fencing", "false" if startup_fencing.lower() != "false" else "true") utils.set_cib_property("startup-fencing", startup_fencing) print("Waiting for nodes canceled")
def nodes_status(lib, argv, modifiers): """ Options: * -f - CIB file - for config subcommand and not for both or corosync * --corosync_conf - only for config subcommand NOTE: modifiers check is in subcommand """ del lib if len(argv) == 1 and (argv[0] == "config"): modifiers.ensure_only_supported("-f", "--corosync_conf") if utils.hasCorosyncConf(): corosync_nodes, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade()) if report_list: process_library_reports(report_list) else: corosync_nodes = [] try: pacemaker_nodes = sorted([ node.attrs.name for node in ClusterState( get_cluster_status_dom( utils.cmd_runner())).node_section.nodes if node.attrs.type != "remote" ]) except LibraryError as e: process_library_reports(e.args) print("Corosync Nodes:") if corosync_nodes: print(" " + " ".join(corosync_nodes)) print("Pacemaker Nodes:") if pacemaker_nodes: print(" " + " ".join(pacemaker_nodes)) return if len(argv) == 1 and (argv[0] == "corosync" or argv[0] == "both"): modifiers.ensure_only_supported() all_nodes, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade()) if report_list: process_library_reports(report_list) online_nodes = utils.getCorosyncActiveNodes() offline_nodes = [] for node in all_nodes: if node not in online_nodes: offline_nodes.append(node) online_nodes.sort() offline_nodes.sort() print("Corosync Nodes:") print(" ".join([" Online:"] + online_nodes)) print(" ".join([" Offline:"] + offline_nodes)) if argv[0] != "both": sys.exit(0) modifiers.ensure_only_supported("-f") info_dom = utils.getClusterState() nodes = info_dom.getElementsByTagName("nodes") if nodes.length == 0: utils.err("No nodes section found") onlinenodes = [] offlinenodes = [] standbynodes = [] standbynodes_with_resources = [] maintenancenodes = [] remote_onlinenodes = [] remote_offlinenodes = [] remote_standbynodes = [] remote_standbynodes_with_resources = [] remote_maintenancenodes = [] for node in nodes[0].getElementsByTagName("node"): node_name = node.getAttribute("name") node_remote = node.getAttribute("type") == "remote" if node.getAttribute("online") == "true": if node.getAttribute("standby") == "true": is_running_resources = (node.getAttribute("resources_running") != "0") if node_remote: if is_running_resources: remote_standbynodes_with_resources.append(node_name) else: remote_standbynodes.append(node_name) else: if is_running_resources: standbynodes_with_resources.append(node_name) else: standbynodes.append(node_name) if node.getAttribute("maintenance") == "true": if node_remote: remote_maintenancenodes.append(node_name) else: maintenancenodes.append(node_name) if (node.getAttribute("standby") == "false" and node.getAttribute("maintenance") == "false"): if node_remote: remote_onlinenodes.append(node_name) else: onlinenodes.append(node_name) else: if node_remote: remote_offlinenodes.append(node_name) else: offlinenodes.append(node_name) print("Pacemaker Nodes:") print(" ".join([" Online:"] + onlinenodes)) print(" ".join([" Standby:"] + standbynodes)) print(" ".join([" Standby with resource(s) running:"] + standbynodes_with_resources)) print(" ".join([" Maintenance:"] + maintenancenodes)) print(" ".join([" Offline:"] + offlinenodes)) print("Pacemaker Remote Nodes:") print(" ".join([" Online:"] + remote_onlinenodes)) print(" ".join([" Standby:"] + remote_standbynodes)) print(" ".join([" Standby with resource(s) running:"] + remote_standbynodes_with_resources)) print(" ".join([" Maintenance:"] + remote_maintenancenodes)) print(" ".join([" Offline:"] + remote_offlinenodes))
def config_restore_remote(infile_name, infile_obj): """ Commandline options: * --request-timeout - timeout for HTTP requests """ extracted = { "version.txt": "", "corosync.conf": "", } try: tarball = tarfile.open(infile_name, "r|*", infile_obj) while True: # next(tarball) does not work in python2.6 tar_member_info = tarball.next() if tar_member_info is None: break if tar_member_info.name in extracted: tar_member = tarball.extractfile(tar_member_info) extracted[tar_member_info.name] = tar_member.read() tar_member.close() tarball.close() except (tarfile.TarError, EnvironmentError) as e: utils.err("unable to read the tarball: %s" % e) config_backup_check_version(extracted["version.txt"]) node_list, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade( conf_text=extracted["corosync.conf"].decode("utf-8"))) if report_list: process_library_reports(report_list) if not node_list: utils.err("no nodes found in the tarball") err_msgs = [] for node in node_list: try: retval, output = utils.checkStatus(node) if retval != 0: err_msgs.append(output) continue _status = json.loads(output) if (_status["corosync"] or _status["pacemaker"] or # not supported by older pcsd, do not fail if not present _status.get("pacemaker_remote", False)): err_msgs.append( "Cluster is currently running on node %s. You need to stop " "the cluster in order to restore the configuration." % node) continue except (ValueError, NameError, LookupError): err_msgs.append("unable to determine status of the node %s" % node) if err_msgs: for msg in err_msgs: utils.err(msg, False) sys.exit(1) # Temporarily disable config files syncing thread in pcsd so it will not # rewrite restored files. 10 minutes should be enough time to restore. # If node returns HTTP 404 it does not support config syncing at all. for node in node_list: retval, output = utils.pauseConfigSyncing(node, 10 * 60) if not (retval == 0 or "(HTTP error: 404)" in output): utils.err(output) if infile_obj: infile_obj.seek(0) tarball_data = infile_obj.read() else: with open(infile_name, "rb") as tarball: tarball_data = tarball.read() error_list = [] for node in node_list: retval, error = utils.restoreConfig(node, tarball_data) if retval != 0: error_list.append(error) if error_list: utils.err("unable to restore all nodes\n" + "\n".join(error_list))
def nodes_status(argv): if len(argv) == 1 and (argv[0] == "config"): if utils.hasCorosyncConf(): corosync_nodes = utils.get_corosync_conf_facade().get_nodes_names() else: corosync_nodes = [] try: pacemaker_nodes = sorted([ node.attrs.name for node in ClusterState( utils.getClusterStateXml()).node_section.nodes if node.attrs.type != 'remote' ]) except LibraryError as e: utils.process_library_reports(e.args) print("Corosync Nodes:") if corosync_nodes: print(" " + " ".join(corosync_nodes)) print("Pacemaker Nodes:") if pacemaker_nodes: print(" " + " ".join(pacemaker_nodes)) return if len(argv) == 1 and (argv[0] == "corosync" or argv[0] == "both"): all_nodes = utils.get_corosync_conf_facade().get_nodes_names() online_nodes = utils.getCorosyncActiveNodes() offline_nodes = [] for node in all_nodes: if node not in online_nodes: offline_nodes.append(node) online_nodes.sort() offline_nodes.sort() print("Corosync Nodes:") print(" ".join([" Online:"] + online_nodes)) print(" ".join([" Offline:"] + offline_nodes)) if argv[0] != "both": sys.exit(0) info_dom = utils.getClusterState() nodes = info_dom.getElementsByTagName("nodes") if nodes.length == 0: utils.err("No nodes section found") onlinenodes = [] offlinenodes = [] standbynodes = [] maintenancenodes = [] remote_onlinenodes = [] remote_offlinenodes = [] remote_standbynodes = [] remote_maintenancenodes = [] for node in nodes[0].getElementsByTagName("node"): node_name = node.getAttribute("name") node_remote = node.getAttribute("type") == "remote" if node.getAttribute("online") == "true": if node.getAttribute("standby") == "true": if node_remote: remote_standbynodes.append(node_name) else: standbynodes.append(node_name) elif node.getAttribute("maintenance") == "true": if node_remote: remote_maintenancenodes.append(node_name) else: maintenancenodes.append(node_name) else: if node_remote: remote_onlinenodes.append(node_name) else: onlinenodes.append(node_name) else: if node_remote: remote_offlinenodes.append(node_name) else: offlinenodes.append(node_name) print("Pacemaker Nodes:") print(" ".join([" Online:"] + onlinenodes)) print(" ".join([" Standby:"] + standbynodes)) print(" ".join([" Maintenance:"] + maintenancenodes)) print(" ".join([" Offline:"] + offlinenodes)) print("Pacemaker Remote Nodes:") print(" ".join([" Online:"] + remote_onlinenodes)) print(" ".join([" Standby:"] + remote_standbynodes)) print(" ".join([" Maintenance:"] + remote_maintenancenodes)) print(" ".join([" Offline:"] + remote_offlinenodes))
def config_restore_remote(infile_name, infile_obj): """ Commandline options: * --request-timeout - timeout for HTTP requests """ extracted = { "version.txt": "", "corosync.conf": "", } try: tarball = tarfile.open(infile_name, "r|*", infile_obj) while True: # next(tarball) does not work in python2.6 tar_member_info = tarball.next() if tar_member_info is None: break if tar_member_info.name in extracted: tar_member = tarball.extractfile(tar_member_info) extracted[tar_member_info.name] = tar_member.read() tar_member.close() tarball.close() except (tarfile.TarError, EnvironmentError) as e: utils.err("unable to read the tarball: %s" % e) config_backup_check_version(extracted["version.txt"]) node_list, report_list = get_existing_nodes_names( utils.get_corosync_conf_facade( conf_text=extracted["corosync.conf"].decode("utf-8") ) ) if report_list: utils.process_library_reports(report_list) if not node_list: utils.err("no nodes found in the tarball") err_msgs = [] for node in node_list: try: retval, output = utils.checkStatus(node) if retval != 0: err_msgs.append(output) continue _status = json.loads(output) if ( _status["corosync"] or _status["pacemaker"] or # not supported by older pcsd, do not fail if not present _status.get("pacemaker_remote", False) ): err_msgs.append( "Cluster is currently running on node %s. You need to stop " "the cluster in order to restore the configuration." % node ) continue except (ValueError, NameError, LookupError): err_msgs.append("unable to determine status of the node %s" % node) if err_msgs: for msg in err_msgs: utils.err(msg, False) sys.exit(1) # Temporarily disable config files syncing thread in pcsd so it will not # rewrite restored files. 10 minutes should be enough time to restore. # If node returns HTTP 404 it does not support config syncing at all. for node in node_list: retval, output = utils.pauseConfigSyncing(node, 10 * 60) if not (retval == 0 or "(HTTP error: 404)" in output): utils.err(output) if infile_obj: infile_obj.seek(0) tarball_data = infile_obj.read() else: with open(infile_name, "rb") as tarball: tarball_data = tarball.read() error_list = [] for node in node_list: retval, error = utils.restoreConfig(node, tarball_data) if retval != 0: error_list.append(error) if error_list: utils.err("unable to restore all nodes\n" + "\n".join(error_list))
def cluster_destroy(argv): if argv: raise CmdLineInputError() if "--all" in utils.pcs_options: # destroy remote and guest nodes cib = None lib_env = utils.get_lib_env() try: cib = lib_env.get_cib() except LibraryError as e: warn("Unable to load CIB to get guest and remote nodes from it, " "those nodes will not be deconfigured.") if cib is not None: try: all_remote_nodes = get_existing_nodes_names(cib=cib) if len(all_remote_nodes) > 0: _destroy_pcmk_remote_env(lib_env, all_remote_nodes, skip_offline_nodes=True, allow_fails=True) except LibraryError as e: utils.process_library_reports(e.args) # destroy full-stack nodes destroy_cluster(utils.get_corosync_conf_facade().get_nodes_names()) else: print("Shutting down pacemaker/corosync services...") for service in ["pacemaker", "corosync-qdevice", "corosync"]: # Returns an error if a service is not running. It is safe to # ignore it since we want it not to be running anyways. utils.stop_service(service) print("Killing any remaining services...") kill_local_cluster_services() try: utils.disableServices() except: # previously errors were suppressed in here, let's keep it that way # for now pass try: disable_service(utils.cmd_runner(), lib_sbd.get_sbd_service_name()) except: # it's not a big deal if sbd disable fails pass print("Removing all cluster configuration files...") dummy_output, dummy_retval = utils.run([ "rm", "-f", settings.corosync_conf_file, settings.corosync_authkey_file, settings.pacemaker_authkey_file, ]) state_files = [ "cib.xml*", "cib-*", "core.*", "hostcache", "cts.*", "pe*.bz2", "cib.*" ] for name in state_files: dummy_output, dummy_retval = utils.run([ "find", "/var/lib/pacemaker", "-name", name, "-exec", "rm", "-f", "{}", ";" ]) try: qdevice_net.client_destroy() except: # errors from deleting other files are suppressed as well # we do not want to fail if qdevice was not set up pass
def cluster_cmd(argv): if len(argv) == 0: usage.cluster() exit(1) sub_cmd = argv.pop(0) if (sub_cmd == "help"): usage.cluster([" ".join(argv)] if argv else []) elif (sub_cmd == "setup"): try: cluster_setup(utils.get_library_wrapper(), argv, utils.get_modifiers()) except LibraryError as e: process_library_reports(e.args) except CmdLineInputError as e: utils.exit_on_cmdline_input_errror(e, "cluster", sub_cmd) elif (sub_cmd == "sync"): sync_nodes(utils.get_corosync_conf_facade().get_nodes_names(), utils.getCorosyncConf()) elif (sub_cmd == "status"): status.cluster_status(argv) elif (sub_cmd == "pcsd-status"): status.cluster_pcsd_status(argv) elif (sub_cmd == "certkey"): cluster_certkey(argv) elif (sub_cmd == "auth"): try: cluster_auth_cmd(utils.get_library_wrapper(), argv, utils.get_modifiers()) except LibraryError as e: process_library_reports(e.args) except CmdLineInputError as e: utils.exit_on_cmdline_input_errror(e, "cluster", sub_cmd) elif (sub_cmd == "start"): if "--all" in utils.pcs_options: if argv: utils.err(ERR_NODE_LIST_AND_ALL_MUTUALLY_EXCLUSIVE) start_cluster_all() else: start_cluster(argv) elif (sub_cmd == "stop"): if "--all" in utils.pcs_options: if argv: utils.err(ERR_NODE_LIST_AND_ALL_MUTUALLY_EXCLUSIVE) stop_cluster_all() else: stop_cluster(argv) elif (sub_cmd == "kill"): kill_cluster(argv) elif (sub_cmd == "standby"): try: node.node_standby_cmd(utils.get_library_wrapper(), argv, utils.get_modifiers(), True) except LibraryError as e: utils.process_library_reports(e.args) except CmdLineInputError as e: utils.exit_on_cmdline_input_errror(e, "node", "standby") elif (sub_cmd == "unstandby"): try: node.node_standby_cmd(utils.get_library_wrapper(), argv, utils.get_modifiers(), False) except LibraryError as e: utils.process_library_reports(e.args) except CmdLineInputError as e: utils.exit_on_cmdline_input_errror(e, "node", "unstandby") elif (sub_cmd == "enable"): if "--all" in utils.pcs_options: if argv: utils.err(ERR_NODE_LIST_AND_ALL_MUTUALLY_EXCLUSIVE) enable_cluster_all() else: enable_cluster(argv) elif (sub_cmd == "disable"): if "--all" in utils.pcs_options: if argv: utils.err(ERR_NODE_LIST_AND_ALL_MUTUALLY_EXCLUSIVE) disable_cluster_all() else: disable_cluster(argv) elif (sub_cmd == "remote-node"): try: cluster_remote_node(argv) except LibraryError as e: utils.process_library_reports(e.args) elif (sub_cmd == "cib"): get_cib(argv) elif (sub_cmd == "cib-push"): cluster_push(argv) elif (sub_cmd == "cib-upgrade"): utils.cluster_upgrade() elif (sub_cmd == "edit"): cluster_edit(argv) elif (sub_cmd == "node"): node_command_map = { "add": node_add, "add-guest": cluster_command.node_add_guest, "add-outside": node_add_outside_cluster, "add-remote": cluster_command.node_add_remote, "clear": cluster_command.node_clear, "remove": node_remove, "remove-guest": cluster_command.node_remove_guest, "remove-remote": cluster_command.create_node_remove_remote( resource.resource_remove), } if argv and argv[0] in node_command_map: try: node_command_map[argv[0]](utils.get_library_wrapper(), argv[1:], utils.get_modifiers()) except LibraryError as e: process_library_reports(e.args) except CmdLineInputError as e: utils.exit_on_cmdline_input_errror(e, "cluster", "node " + argv[0]) else: usage.cluster(["node"]) sys.exit(1) elif (sub_cmd == "uidgid"): cluster_uidgid(argv) elif (sub_cmd == "corosync"): cluster_get_corosync_conf(argv) elif (sub_cmd == "reload"): cluster_reload(argv) elif (sub_cmd == "destroy"): try: cluster_destroy(argv) except CmdLineInputError as e: utils.exit_on_cmdline_input_errror(e, "cluster", sub_cmd) elif (sub_cmd == "verify"): cluster_verify(argv) elif (sub_cmd == "report"): cluster_report(argv) elif (sub_cmd == "quorum"): if argv and argv[0] == "unblock": quorum.quorum_unblock_cmd(argv[1:]) else: usage.cluster() sys.exit(1) elif (sub_cmd == "remove_nodes_from_cib"): try: remove_nodes_from_cib( utils.get_library_wrapper(), argv, utils.get_modifiers(), ) except LibraryError as e: process_library_reports(e.args) except CmdLineInputError as e: utils.exit_on_cmdline_input_errror(e, "cluster", sub_cmd) else: usage.cluster() sys.exit(1)
def disable_cluster_all(): disable_cluster_nodes(utils.get_corosync_conf_facade().get_nodes_names())
def stop_cluster_nodes(nodes): all_nodes = utils.get_corosync_conf_facade().get_nodes_names() unknown_nodes = set(nodes) - set(all_nodes) if unknown_nodes: utils.err("nodes '%s' do not appear to exist in configuration" % "', '".join(unknown_nodes)) stopping_all = set(nodes) >= set(all_nodes) if "--force" not in utils.pcs_options and not stopping_all: error_list = [] for node in nodes: retval, data = utils.get_remote_quorumtool_output(node) if retval != 0: error_list.append(node + ": " + data) continue try: quorum_status = corosync_live.QuorumStatus.from_string(data) if not quorum_status.is_quorate: # Get quorum status from a quorate node, non-quorate nodes # may provide inaccurate info. If no node is quorate, there # is no quorum to be lost and therefore no error to be # reported. continue if quorum_status.stopping_nodes_cause_quorum_loss(nodes): utils.err( "Stopping the node(s) will cause a loss of the quorum" + ", use --force to override") else: # We have the info, no need to print errors error_list = [] break except corosync_live.QuorumStatusException: if not utils.is_node_offline_by_quorumtool_output(data): error_list.append(node + ": Unable to get quorum status") # else the node seems to be stopped already if error_list: utils.err( "Unable to determine whether stopping the nodes will cause " + "a loss of the quorum, use --force to override\n" + "\n".join(error_list)) was_error = False node_errors = parallel_for_nodes(utils.repeat_if_timeout( utils.stopPacemaker), nodes, quiet=True) accessible_nodes = [ node for node in nodes if node not in node_errors.keys() ] if node_errors: utils.err("unable to stop all nodes\n" + "\n".join(node_errors.values()), exit_after_error=not accessible_nodes) was_error = True for node in node_errors: print("{0}: Not stopping cluster - node is unreachable".format(node)) node_errors = parallel_for_nodes(utils.stopCorosync, accessible_nodes, quiet=True) if node_errors: utils.err("unable to stop all nodes\n" + "\n".join(node_errors.values())) if was_error: utils.err("unable to stop all nodes")
def stop_cluster_all(): stop_cluster_nodes(utils.get_corosync_conf_facade().get_nodes_names())