示例#1
0
文件: status.py 项目: mbaldessari/pcs
def _get_local_services_status(
    service_manager: ServiceManagerInterface,
) -> List[_ServiceStatus]:
    service_def = [
        # (service name, display even if not enabled nor running)
        ("corosync", True),
        ("pacemaker", True),
        ("pacemaker_remote", False),
        ("pcsd", True),
        (get_sbd_service_name(service_manager), False),
    ]
    service_status_list = []
    for service, display_always in service_def:
        try:
            service_status_list.append(
                _ServiceStatus(
                    service,
                    display_always,
                    service_manager.is_enabled(service),
                    service_manager.is_running(service),
                )
            )
        except LibraryError:
            pass
    return service_status_list
示例#2
0
def status_stonith_check():
    # We should read the default value from pacemaker. However that may slow
    # pcs down as we need to run 'pengine metadata' to get it.
    stonith_enabled = True
    stonith_devices = []
    stonith_devices_id_action = []
    stonith_devices_id_method_cycle = []
    sbd_running = False

    cib = utils.get_cib_dom()
    for conf in cib.getElementsByTagName("configuration"):
        for crm_config in conf.getElementsByTagName("crm_config"):
            for nvpair in crm_config.getElementsByTagName("nvpair"):
                if (nvpair.getAttribute("name") == "stonith-enabled"
                        and is_false(nvpair.getAttribute("value"))):
                    stonith_enabled = False
                    break
            if not stonith_enabled:
                break
        for resource in conf.getElementsByTagName("primitive"):
            if resource.getAttribute("class") == "stonith":
                stonith_devices.append(resource)
                for attribs in resource.getElementsByTagName(
                        "instance_attributes"):
                    for nvpair in attribs.getElementsByTagName("nvpair"):
                        if (nvpair.getAttribute("name") == "action"
                                and nvpair.getAttribute("value")):
                            stonith_devices_id_action.append(
                                resource.getAttribute("id"))
                        if (nvpair.getAttribute("name") == "method"
                                and nvpair.getAttribute("value") == "cycle"):
                            stonith_devices_id_method_cycle.append(
                                resource.getAttribute("id"))

    if not utils.usefile:
        # check if SBD daemon is running
        try:
            sbd_running = utils.is_service_running(utils.cmd_runner(),
                                                   get_sbd_service_name())
        except LibraryError:
            pass

    if stonith_enabled and not stonith_devices and not sbd_running:
        print("WARNING: no stonith devices and stonith-enabled is not false")

    if stonith_devices_id_action:
        print(
            "WARNING: following stonith devices have the 'action' option set, "
            "it is recommended to set {0} instead: {1}".format(
                ", ".join(
                    ["'{0}'".format(x) for x in _STONITH_ACTION_REPLACED_BY]),
                ", ".join(sorted(stonith_devices_id_action))))
    if stonith_devices_id_method_cycle:
        print(
            "WARNING: following stonith devices have the 'method' option set "
            "to 'cycle' which is potentially dangerous, please consider using "
            "'onoff': {0}".format(", ".join(
                sorted(stonith_devices_id_method_cycle))))
示例#3
0
def full_cluster_status_plaintext(
    env: LibraryEnvironment,
    hide_inactive_resources: bool = False,
    verbose: bool = False,
) -> str:
    """
    Return full cluster status as plaintext

    env -- LibraryEnvironment
    hide_inactive_resources -- if True, do not display non-running resources
    verbose -- if True, display more info
    """
    # pylint: disable=too-many-branches
    # pylint: disable=too-many-locals
    # pylint: disable=too-many-statements

    # validation
    if not env.is_cib_live and env.is_corosync_conf_live:
        raise LibraryError(
            ReportItem.error(
                reports.messages.LiveEnvironmentNotConsistent(
                    [file_type_codes.CIB], [file_type_codes.COROSYNC_CONF],
                )
            )
        )
    if env.is_cib_live and not env.is_corosync_conf_live:
        raise LibraryError(
            ReportItem.error(
                reports.messages.LiveEnvironmentNotConsistent(
                    [file_type_codes.COROSYNC_CONF], [file_type_codes.CIB],
                )
            )
        )

    # initialization
    runner = env.cmd_runner()
    report_processor = env.report_processor
    live = env.is_cib_live and env.is_corosync_conf_live
    is_sbd_running = False

    # load status, cib, corosync.conf
    status_text, warning_list = get_cluster_status_text(
        runner, hide_inactive_resources, verbose
    )
    corosync_conf = None
    # If we are live on a remote node, we have no corosync.conf.
    # TODO Use the new file framework so the path is not exposed.
    if not live or os.path.exists(settings.corosync_conf_file):
        corosync_conf = env.get_corosync_conf()
    cib = env.get_cib()
    if verbose:
        (
            ticket_status_text,
            ticket_status_stderr,
            ticket_status_retval,
        ) = get_ticket_status_text(runner)
    # get extra info if live
    if live:
        try:
            is_sbd_running = is_service_running(runner, get_sbd_service_name())
        except LibraryError:
            pass
        local_services_status = _get_local_services_status(runner)
        if verbose and corosync_conf:
            node_name_list, node_names_report_list = get_existing_nodes_names(
                corosync_conf
            )
            report_processor.report_list(node_names_report_list)
            node_reachability = _get_node_reachability(
                env.get_node_target_factory(),
                env.get_node_communicator(),
                report_processor,
                node_name_list,
            )

    # check stonith configuration
    warning_list = list(warning_list)
    warning_list.extend(_stonith_warnings(cib, is_sbd_running))

    # put it all together
    if report_processor.has_errors:
        raise LibraryError()

    cluster_name = (
        corosync_conf.get_cluster_name()
        if corosync_conf
        else nvpair.get_value(
            "cluster_property_set", get_crm_config(cib), "cluster-name", ""
        )
    )
    parts = []
    parts.append(f"Cluster name: {cluster_name}")
    if warning_list:
        parts.extend(["", "WARNINGS:"] + warning_list + [""])
    parts.append(status_text)
    if verbose:
        parts.extend(["", "Tickets:"])
        if ticket_status_retval != 0:
            ticket_warning_parts = [
                "WARNING: Unable to get information about tickets"
            ]
            if ticket_status_stderr:
                ticket_warning_parts.extend(
                    indent(ticket_status_stderr.splitlines())
                )
            parts.extend(indent(ticket_warning_parts))
        else:
            parts.extend(indent(ticket_status_text.splitlines()))
    if live:
        if verbose and corosync_conf:
            parts.extend(["", "PCSD Status:"])
            parts.extend(
                indent(
                    _format_node_reachability(node_name_list, node_reachability)
                )
            )
        parts.extend(["", "Daemon Status:"])
        parts.extend(
            indent(_format_local_services_status(local_services_status))
        )
    return "\n".join(parts)
示例#4
0
 def test_not_systemctl(self, mock_is_systemctl):
     mock_is_systemctl.return_value = False
     self.assertEqual("sbd_helper", lib_sbd.get_sbd_service_name())
     mock_is_systemctl.assert_called_once_with()
示例#5
0
 def test_not_systemctl(self, mock_is_systemctl):
     mock_is_systemctl.return_value = False
     self.assertEqual("sbd_helper", lib_sbd.get_sbd_service_name())
     mock_is_systemctl.assert_called_once_with()
示例#6
0
def full_cluster_status_plaintext(
    env: LibraryEnvironment,
    hide_inactive_resources: bool = False,
    verbose: bool = False,
) -> str:
    """
    Return full cluster status as plaintext

    env -- LibraryEnvironment
    hide_inactive_resources -- if True, do not display non-running resources
    verbose -- if True, display more info
    """
    # pylint: disable=too-many-branches
    # pylint: disable=too-many-locals

    # validation
    if not env.is_cib_live and env.is_corosync_conf_live:
        raise LibraryError(
            reports.live_environment_not_consistent(
                [file_type_codes.CIB],
                [file_type_codes.COROSYNC_CONF],
            ))
    if env.is_cib_live and not env.is_corosync_conf_live:
        raise LibraryError(
            reports.live_environment_not_consistent(
                [file_type_codes.COROSYNC_CONF],
                [file_type_codes.CIB],
            ))

    # initialization
    runner = env.cmd_runner()
    report_processor = SimpleReportProcessor(env.report_processor)
    live = env.is_cib_live and env.is_corosync_conf_live
    is_sbd_running = False

    # load status, cib, corosync.conf
    status_text, warning_list = get_cluster_status_text(
        runner, hide_inactive_resources, verbose)
    corosync_conf = env.get_corosync_conf()
    cib = env.get_cib()
    if verbose:
        ticket_status_text, ticket_status_stderr, ticket_status_retval = (
            get_ticket_status_text(runner))
    # get extra info if live
    if live:
        try:
            is_sbd_running = is_service_running(runner, get_sbd_service_name())
        except LibraryError:
            pass
        local_services_status = _get_local_services_status(runner)
        if verbose:
            node_name_list, node_names_report_list = get_existing_nodes_names(
                corosync_conf)
            report_processor.report_list(node_names_report_list)
            node_reachability = _get_node_reachability(
                env.get_node_target_factory(),
                env.get_node_communicator(),
                report_processor,
                node_name_list,
            )

    # check stonith configuration
    warning_list = list(warning_list)
    warning_list.extend(_stonith_warnings(cib, is_sbd_running))

    # put it all together
    if report_processor.has_errors:
        raise LibraryError()

    parts = []
    parts.append(f"Cluster name: {corosync_conf.get_cluster_name()}")
    if warning_list:
        parts.extend(["", "WARNINGS:"] + warning_list + [""])
    parts.append(status_text)
    if verbose:
        parts.extend(["", "Tickets:"])
        if ticket_status_retval != 0:
            ticket_warning_parts = [
                "WARNING: Unable to get information about tickets"
            ]
            if ticket_status_stderr:
                ticket_warning_parts.extend(
                    indent(ticket_status_stderr.splitlines()))
            parts.extend(indent(ticket_warning_parts))
        else:
            parts.extend(indent(ticket_status_text.splitlines()))
    if live:
        if verbose:
            parts.extend(["", "PCSD Status:"])
            parts.extend(
                indent(
                    _format_node_reachability(node_name_list,
                                              node_reachability)))
        parts.extend(["", "Daemon Status:"])
        parts.extend(
            indent(_format_local_services_status(local_services_status)))
    return "\n".join(parts)
示例#7
0
def status_stonith_check(modifiers):
    """
    Commandline options:
      * -f - CIB file, to get stonith devices and cluster property
        stonith-enabled from CIB, to determine whenever we are working with
        files or cluster
    """
    # pylint: disable=too-many-nested-blocks
    # We should read the default value from pacemaker. However that may slow
    # pcs down as we need to run 'pacemaker-schedulerd metadata' to get it.
    warnings = []
    stonith_enabled = True
    stonith_devices = []
    stonith_devices_id_action = []
    stonith_devices_id_method_cycle = []
    sbd_running = False

    cib = utils.get_cib_dom()
    for conf in cib.getElementsByTagName("configuration"):
        for crm_config in conf.getElementsByTagName("crm_config"):
            for nvpair in crm_config.getElementsByTagName("nvpair"):
                if (nvpair.getAttribute("name") == "stonith-enabled"
                        and is_false(nvpair.getAttribute("value"))):
                    stonith_enabled = False
                    break
            if not stonith_enabled:
                break
        for resource_el in conf.getElementsByTagName("primitive"):
            if resource_el.getAttribute("class") == "stonith":
                stonith_devices.append(resource_el)
                for attribs in resource_el.getElementsByTagName(
                        "instance_attributes"):
                    for nvpair in attribs.getElementsByTagName("nvpair"):
                        if (nvpair.getAttribute("name") == "action"
                                and nvpair.getAttribute("value")):
                            stonith_devices_id_action.append(
                                resource_el.getAttribute("id"))
                        if (nvpair.getAttribute("name") == "method"
                                and nvpair.getAttribute("value") == "cycle"):
                            stonith_devices_id_method_cycle.append(
                                resource_el.getAttribute("id"))

    if not modifiers.is_specified("-f"):
        # check if SBD daemon is running
        try:
            sbd_running = utils.is_service_running(utils.cmd_runner(),
                                                   get_sbd_service_name())
        except LibraryError:
            pass

    if stonith_enabled and not stonith_devices and not sbd_running:
        warnings.append("No stonith devices and stonith-enabled is not false")

    if stonith_devices_id_action:
        warnings.append(
            "Following stonith devices have the 'action' option set, "
            "it is recommended to set {0} instead: {1}".format(
                ", ".join(
                    ["'{0}'".format(x) for x in _STONITH_ACTION_REPLACED_BY]),
                ", ".join(sorted(stonith_devices_id_action))))
    if stonith_devices_id_method_cycle:
        warnings.append(
            "Following stonith devices have the 'method' option set "
            "to 'cycle' which is potentially dangerous, please consider using "
            "'onoff': {0}".format(", ".join(
                sorted(stonith_devices_id_method_cycle))))
    return warnings
示例#8
0
 def test_systemctl(self, mock_is_systemd):
     mock_is_systemd.return_value = True
     self.assertEqual("sbd",
                      lib_sbd.get_sbd_service_name(self.service_manager))
     mock_is_systemd.assert_called_once_with(self.service_manager)
示例#9
0
文件: cluster.py 项目: junaruga/pcs
def cluster_destroy(argv):
    if argv:
        raise CmdLineInputError()
    if "--all" in utils.pcs_options:
        # destroy remote and guest nodes
        cib = None
        lib_env = utils.get_lib_env()
        try:
            cib = lib_env.get_cib()
        except LibraryError as e:
            warn("Unable to load CIB to get guest and remote nodes from it, "
                 "those nodes will not be deconfigured.")
        if cib is not None:
            try:
                all_remote_nodes = get_existing_nodes_names(cib=cib)
                if len(all_remote_nodes) > 0:
                    _destroy_pcmk_remote_env(lib_env,
                                             all_remote_nodes,
                                             skip_offline_nodes=True,
                                             allow_fails=True)
            except LibraryError as e:
                utils.process_library_reports(e.args)

        # destroy full-stack nodes
        destroy_cluster(utils.get_corosync_conf_facade().get_nodes_names())
    else:
        print("Shutting down pacemaker/corosync services...")
        for service in ["pacemaker", "corosync-qdevice", "corosync"]:
            # Returns an error if a service is not running. It is safe to
            # ignore it since we want it not to be running anyways.
            utils.stop_service(service)
        print("Killing any remaining services...")
        kill_local_cluster_services()
        try:
            utils.disableServices()
        except:
            # previously errors were suppressed in here, let's keep it that way
            # for now
            pass
        try:
            disable_service(utils.cmd_runner(), lib_sbd.get_sbd_service_name())
        except:
            # it's not a big deal if sbd disable fails
            pass

        print("Removing all cluster configuration files...")
        dummy_output, dummy_retval = utils.run([
            "rm",
            "-f",
            settings.corosync_conf_file,
            settings.corosync_authkey_file,
            settings.pacemaker_authkey_file,
        ])
        state_files = [
            "cib.xml*", "cib-*", "core.*", "hostcache", "cts.*", "pe*.bz2",
            "cib.*"
        ]
        for name in state_files:
            dummy_output, dummy_retval = utils.run([
                "find", "/var/lib/pacemaker", "-name", name, "-exec", "rm",
                "-f", "{}", ";"
            ])
        try:
            qdevice_net.client_destroy()
        except:
            # errors from deleting other files are suppressed as well
            # we do not want to fail if qdevice was not set up
            pass