Пример #1
0
def install_packages(repos, packages):
    """
    Explicitly evaluate and install or update any specific-version dependencies and satisfy even if
    that involves installing an older package than is already installed.
    Primary use case is installing lustre-modules, which depends on a specific kernel package.

    :param repos: List of strings, yum repo names
    :param packages: List of strings, yum package names
    :return: package report of the format given by the lustre device plugin
    """
    if packages != []:
        yum_util("clean")

        out = yum_util("requires", enablerepo=repos, packages=packages)
        for requirement in [l.strip() for l in out.strip().split("\n")]:
            match = re.match("([^\)/]*) = (.*)", requirement)
            if match:
                require_package, require_version = match.groups()
                packages.append("%s-%s" % (require_package, require_version))

        yum_util("install", enablerepo=repos, packages=packages)

        error = _check_HYD4050()

        if error:
            return agent_error(error)

    ServiceControl.create("iml-update-check").start(0)

    return agent_result_ok
Пример #2
0
    def _setup_ntp(self, server):
        """
        Change the ntp configuration file to use the server passed.

        If no server is passed then use the existing setting and if there is no existing setting ask the user
        which server they would like to use.

        Enable NTPConfig to recognise legacy line marker used in previous IML manager NTP configurations routines by
        passing it as a parameter to the get_configured_server method call
        """
        ntp = NTPConfig(logger=log)
        existing_server = ntp.get_configured_server(
            markers=["# Added by chroma-manager\n"])

        if not server:
            if existing_server:
                server = existing_server
                log.info("Using existing (chroma configured) ntp server: %s" %
                         existing_server)
            else:
                # Only if you haven't already set it
                server = self.get_input(msg="NTP Server", default="localhost")

        log.info("Writing ntp configuration: %s " % server)

        error = ntp.add(server)
        if error:
            log.error(
                "Failed to write ntp server (%s) to config file (%s), %s" %
                (server, ntp.CONFIG_FILE, error))
            raise RuntimeError("Failure when writing ntp config: %s" % error)

        if ServiceControl.create("firewalld").running:
            error = firewall_control.add_rule("123", "udp", "ntpd")

            if error:
                log.error("firewall command failed:\n%s" % error)
                raise RuntimeError(
                    "Failure when opening port in firewall for ntpd: %s" %
                    error)

        log.info("Disabling chrony if active")
        chrony_service = ServiceControl.create("chronyd")
        chrony_service.stop(validate_time=0.5)
        chrony_service.disable()

        log.info("Restarting ntp")
        ntp_service = ServiceControl.create("ntpd")
        ntp_service.enable()
        error = ntp_service.restart()

        if error:
            log.error(error)
            raise RuntimeError(error)
Пример #3
0
 def _restart_pgsql(self):
     postgresql_service = ServiceControl.create("postgresql-9.6")
     if postgresql_service.running:
         postgresql_service.reload()
     else:
         postgresql_service.start()
     postgresql_service.enable()
Пример #4
0
    def disable_and_kill():
        console_log.info("Terminating")

        storage_server_target = ServiceControl.create(
            "iml-storage-server.target")
        storage_server_target.disable()
        storage_server_target.stop()
Пример #5
0
def start_monitored_copytool(id):
    # Start the monitor first so that we have a reader on the FIFO when
    # the copytool begins emitting events. Then start the copytool

    copytool_vars = _copytool_vars(id)

    for service_name in ["chroma-copytool-monitor", "chroma-copytool"]:
        _write_service_init(
            service_name,
            copytool_vars["id"],
            copytool_vars["ct_path"],
            copytool_vars["ct_arguments"],
        )

        service = ServiceControl.create("%s-%s" % (service_name, id))

        service.daemon_reload()

        if service.running:
            error = service.restart()
        else:
            error = service.start()

        if error:
            return agent_error(error)

    return agent_result_ok
Пример #6
0
    def _stop_services(self):
        log.info("Stopping iml manager")
        controller = ServiceControl.create("iml-manager.target")

        error = controller.stop(validate_time=0.5)
        if error:
            log.error(error)
            raise RuntimeError(error)
Пример #7
0
 def _configure_firewall(self):
     if ServiceControl.create("firewalld").running:
         for port in [80, 443]:
             self.try_shell([
                 "firewall-cmd", "--permanent",
                 "--add-port={}/tcp".format(port)
             ])
             self.try_shell(
                 ["firewall-cmd", "--add-port={}/tcp".format(port)])
Пример #8
0
 def _configure_firewall(self):
     if ServiceControl.create('firewalld').running:
         for port in [80, 443]:
             self.try_shell([
                 'firewall-cmd', '--permanent',
                 '--add-port={}/tcp'.format(port)
             ])
             self.try_shell(
                 ['firewall-cmd', '--add-port={}/tcp'.format(port)])
Пример #9
0
    def _stop_services(self):
        log.info("Stopping daemons")
        for service in self.CONTROLLED_SERVICES:
            controller = ServiceControl.create(service)

            error = controller.stop()
            if error:
                log.error(error)
                raise RuntimeError(error)
Пример #10
0
    def _enable_services(self):
        log.info("Enabling daemons")
        for service in self.CONTROLLED_SERVICES:
            controller = ServiceControl.create(service)

            error = controller.enable()
            if error:
                log.error(error)
                raise RuntimeError(error)
Пример #11
0
    def setUp(self):
        super(TestServiceStateEL7, self).setUp()

        mock.patch.object(
            util, 'platform_info',
            util.PlatformInfo('Linux', 'CentOS', 0.0, '7.3', 0.0, 0,
                              '')).start()

        self.test = ServiceControl.create('test_service')
        self.assertEqual(type(self.test), ServiceControlEL7)
Пример #12
0
    def setUp(self):
        super(TestServiceStateEL7, self).setUp()

        mock.patch.object(
            util, "platform_info",
            util.PlatformInfo("Linux", "CentOS", 0.0, "7.3", 0.0, 0,
                              "")).start()

        self.test = ServiceControl.create("test_service")
        self.assertEqual(type(self.test), ServiceControlEL7)
    def _service_config(interesting_services=None):
        """Interrogate the current status of services, it should be noted that el7 calls to
        systemctl through ServiceControl will redirect to chkconfig if the specified service is
        not native (SysV style init as opposed to systemd unit init file)
        """
        log.info("Checking service configuration...")

        services = {}
        for service_name in interesting_services:
            controller = ServiceControl.create(service_name)
            services[service_name] = {"enabled": controller.enabled, "running": controller.running}

        return services
Пример #14
0
 def _setup_grafana(self):
     # grafana needs daemon-reload before enable and start
     ServiceControlEL7.daemon_reload()
     service = ServiceControl.create("grafana-server")
     error = service.enable()
     if error:
         log.error(error)
         raise RuntimeError(error)
     if service.running:
         service.stop()
     error = service.start()
     if error:
         log.error(error)
         raise RuntimeError(error)
Пример #15
0
    def _setup_pgsql(self, database, check_db_space):
        log.info("Setting up PostgreSQL service...")

        self._init_pgsql(database)

        postgresql_service = ServiceControl.create("postgresql")
        postgresql_service.restart()
        postgresql_service.enable()

        tries = 0
        while self.shell(["su", "postgres", "-c", "psql -c '\\d'"])[0] != 0:
            if tries >= 4:
                raise RuntimeError(
                    "Timed out waiting for PostgreSQL service to start")
            tries += 1
            time.sleep(1)

        error = self._check_db_space(self.REQUIRED_DB_SPACE_GB)

        if check_db_space and error:
            return error

        if not self._db_accessible():
            log.info("Creating database owner '%s'...\n" % database["USER"])

            # Enumerate existing roles
            _, roles_str, _ = self.try_shell([
                "su", "postgres", "-c", "psql -t -c 'select "
                "rolname from pg_roles;'"
            ])
            roles = [
                line.strip() for line in roles_str.split("\n") if line.strip()
            ]

            # Create database['USER'] role if not found
            if not database["USER"] in roles:
                self.try_shell([
                    "su",
                    "postgres",
                    "-c",
                    "psql -c 'CREATE ROLE %s NOSUPERUSER "
                    "CREATEDB NOCREATEROLE INHERIT LOGIN;'" % database["USER"],
                ])

            log.info("Creating database '%s'...\n" % database["NAME"])
            self.try_shell([
                "su", "postgres", "-c",
                "createdb -O %s %s;" % (database["USER"], database["NAME"])
            ])
        return None
Пример #16
0
    def _start_services(self):
        log.info("Starting daemons")
        for service in self.CONTROLLED_SERVICES:
            controller = ServiceControl.create(service)

            if controller.running:
                if service.endswith(".target"):
                    error = False
                else:
                    error = controller.reload()
            else:
                error = controller.start()
            if error:
                log.error(error)
                raise RuntimeError(error)
Пример #17
0
def stop_monitored_copytool(id):
    # Stop the monitor after the copytool so that we can relay the
    # unconfigure event.

    for service_name in ['chroma-copytool-monitor', 'chroma-copytool']:
        service = ServiceControl.create('%s-%s' % (service_name, id))

        if os.path.exists(_init_file_name(service_name, id)) and service.running:
            error = service.stop()

            if error:
                return agent_error(error)

            os.remove(_init_file_name(service_name, id))

        service.daemon_reload()         # Finally cause the system agents to see our changes.

    return agent_result_ok
Пример #18
0
    def validate(self):
        errors = []
        if not self._db_accessible():
            errors.append("Cannot connect to database")
        elif not self._db_current():
            errors.append("Database tables out of date")
        elif not self._users_exist():
            errors.append("No user accounts exist")

        controller = ServiceControl.create("iml-manager.target")

        try:
            if not controller.enabled:
                errors.append("iml-manager.target not set to start at boot")
            if not controller.running:
                errors.append("iml-manager.target is not running")
        except KeyError:
            errors.append("iml-manager.target not found")

        return errors
Пример #19
0
    def _start_services(self):
        log.info("Starting daemons")
        for service in self.CONTROLLED_SERVICES:
            controller = ServiceControl.create(service)

            error = controller.start()
            if error:
                log.error(error)
                raise RuntimeError(error)

        SUPERVISOR_START_TIMEOUT = 10
        t = 0
        while True:
            if not SupervisorStatus().get_non_running_services():
                break
            else:
                time.sleep(1)
                t += 1
                if t > SUPERVISOR_START_TIMEOUT:
                    msg = "Some services failed to start: %s" % \
                          ", ".join(SupervisorStatus().get_non_running_services())
                    log.error(msg)
                    raise RuntimeError(msg)
Пример #20
0
    def _stop_services(self):
        log.info("Stopping daemons")
        for service in self.CONTROLLED_SERVICES:
            controller = ServiceControl.create(service)

            error = controller.stop()
            if error:
                log.error(error)
                raise RuntimeError(error)

        # Wait for supervisord to stop running
        SUPERVISOR_STOP_TIMEOUT = 20
        t = 0
        stopped = False
        while True:
            try:
                SupervisorStatus().get_all_process_info()
            except socket.error:
                # No longer up
                stopped = True
            except xmlrpclib.Fault, e:
                if (e.faultCode, e.faultString) == (6, 'SHUTDOWN_STATE'):
                    # Up but shutting down
                    pass
                else:
                    raise

            if stopped:
                break
            else:
                if t > SUPERVISOR_STOP_TIMEOUT:
                    raise RuntimeError(
                        "chroma-supervisor failed to stop after %s seconds" %
                        SUPERVISOR_STOP_TIMEOUT)
                else:
                    t += 1
                    time.sleep(1)
Пример #21
0
# Copyright (c) 2018 DDN. All rights reserved.
# Use of this source code is governed by a MIT-style
# license that can be found in the LICENSE file.

from iml_common.lib.ntp import NTPConfig
from iml_common.lib.agent_rpc import agent_ok_or_error
from iml_common.lib.service_control import ServiceControl

ntp_service = ServiceControl.create("ntpd")
chrony_service = ServiceControl.create("chronyd")


def unconfigure_ntp():
    """
    Unconfigure the ntp client

    :return: Value using simple return protocol
    """
    return configure_ntp(None)


def configure_ntp(ntp_server):
    """
    Change the ntp configuration file to use the server passed

    :return: Value using simple return protocol
    """
    error = NTPConfig().add(ntp_server)
    if error:
        return error
    else:
Пример #22
0
"""

import os

from chroma_agent import config
from chroma_agent import conf
from chroma_agent.agent_client import AgentClient, HttpError
from chroma_agent.agent_daemon import ServerProperties
from chroma_agent.crypto import Crypto
from chroma_agent.device_plugins.action_runner import CallbackAfterResponse
from chroma_agent.log import console_log
from chroma_agent.plugin_manager import ActionPluginManager, DevicePluginManager
from iml_common.lib.service_control import ServiceControl
from iml_common.lib.agent_rpc import agent_ok_or_error

agent_service = ServiceControl.create("chroma-agent")


def _service_is_running():
    # returns True if running
    return agent_service.running


def deregister_server():
    conf.remove_server_url()

    def disable_and_kill():
        console_log.info("Terminating")

        storage_server_target = ServiceControl.create(
            "iml-storage-server.target")
Пример #23
0
# Copyright (c) 2018 DDN. All rights reserved.
# Use of this source code is governed by a MIT-style
# license that can be found in the LICENSE file.

from iml_common.lib.ntp import NTPConfig
from iml_common.lib.agent_rpc import agent_ok_or_error
from iml_common.lib.service_control import ServiceControl

ntp_service = ServiceControl.create("ntpd")


def unconfigure_ntp():
    """
    Unconfigure the ntp client

    :return: Value using simple return protocol
    """
    return configure_ntp(None)


def configure_ntp(ntp_server):
    """
    Change the ntp configuration file to use the server passed

    :return: Value using simple return protocol
    """
    error = NTPConfig().add(ntp_server)
    if error:
        return error
    else:
        return agent_ok_or_error(ntp_service.restart())
Пример #24
0
# Copyright (c) 2018 DDN. All rights reserved.
# Use of this source code is governed by a MIT-style
# license that can be found in the LICENSE file.

from iml_common.lib.ntp import NTPConfig
from iml_common.lib.agent_rpc import agent_ok_or_error
from iml_common.lib.service_control import ServiceControl

ntp_service = ServiceControl.create('ntpd')


def unconfigure_ntp():
    """
    Unconfigure the ntp client

    :return: Value using simple return protocol
    """
    return configure_ntp(None)


def configure_ntp(ntp_server):
    """
    Change the ntp configuration file to use the server passed

    :return: Value using simple return protocol
    """
    error = NTPConfig().add(ntp_server)
    if error:
        return error
    else:
        return agent_ok_or_error(ntp_service.restart())
Пример #25
0
# Copyright (c) 2017 Intel Corporation. All rights reserved.
# Use of this source code is governed by a MIT-style
# license that can be found in the LICENSE file.

import os

from chroma_agent.device_plugins.syslog import SYSLOG_PORT
from iml_common.lib.agent_rpc import agent_ok_or_error
from iml_common.lib.service_control import ServiceControl

rsyslog_service = ServiceControl.create('rsyslog')


def unconfigure_rsyslog():
    """
    Modify the rsyslogd configuration to stop forwarding messages to chroma

    :return: None
    """
    return _configure_rsyslog("")


def configure_rsyslog():
    """
    Modify the rsyslogd configuration to forward all messages to chroma

    :return: None
    """
    return _configure_rsyslog("127.0.0.1")

Пример #26
0
Corosync verification
"""

from chroma_agent.log import console_log
from chroma_agent.lib.shell import AgentShell
from chroma_agent.lib.corosync import CorosyncRingInterface
from chroma_agent.action_plugins.manage_corosync_common import InterfaceInfo

from iml_common.lib.service_control import ServiceControl
from iml_common.lib.firewall_control import FirewallControl
from iml_common.lib.agent_rpc import agent_error
from iml_common.lib.agent_rpc import agent_ok_or_error

PCS_TCP_PORT = 2224

corosync_service = ServiceControl.create("corosync")
pcsd_service = ServiceControl.create("pcsd")
firewall_control = FirewallControl.create()

PCS_USER = "******"
PCS_CLUSTER_NAME = "lustre-ha-cluster"
COROSYNC_CONF_PATH = "/etc/corosync/corosync.conf"


def start_corosync2():
    return agent_ok_or_error(corosync_service.enable() or corosync_service.start())


def stop_corosync2():
    return agent_ok_or_error(corosync_service.stop())
Пример #27
0
def corosync_running():
    return ServiceControl.create('corosync').running
Пример #28
0
it from manager)
"""

import os

from chroma_agent import config
from chroma_agent.agent_client import AgentClient, HttpError
from chroma_agent.agent_daemon import ServerProperties
from chroma_agent.crypto import Crypto
from chroma_agent.device_plugins.action_runner import CallbackAfterResponse
from chroma_agent.log import console_log
from chroma_agent.plugin_manager import ActionPluginManager, DevicePluginManager
from iml_common.lib.service_control import ServiceControl
from iml_common.lib.agent_rpc import agent_ok_or_error

agent_service = ServiceControl.create('chroma-agent')


def _service_is_running():
    # returns True if running
    return agent_service.running


def _start_service():
    return agent_ok_or_error(agent_service.start())


def _stop_service():
    return agent_ok_or_error(agent_service.stop())

Пример #29
0
from chroma_agent.log import daemon_log
from manage_corosync import start_corosync, stop_corosync
from chroma_agent.lib.pacemaker import pacemaker_running
from chroma_agent.lib.corosync import corosync_running
from iml_common.lib.service_control import ServiceControl
from iml_common.lib.agent_rpc import agent_error, agent_result_ok, agent_ok_or_error

# The window of time in which we count resource monitor failures
RSRC_FAIL_WINDOW = "20m"
# The number of times in the above window a resource monitor can fail
# before we migrate it
RSRC_FAIL_MIGRATION_COUNT = "3"

PACEMAKER_CONFIGURE_TIMEOUT = 120

pacemaker_service = ServiceControl.create('pacemaker')
corosync_service = ServiceControl.create('corosync')


def _get_cluster_size():
    # you'd think there'd be a way to query the value of a property
    # such as "expected-quorum-votes" but there does not seem to be, so
    # just count nodes instead
    rc, stdout, stderr = AgentShell.run_old(["crm_node", "-l"])

    if not stdout:
        return 0

    n = 0
    for line in stdout.rstrip().split('\n'):
        node_id, name, status = line.split(" ")
Пример #30
0
from os import remove
from collections import namedtuple
import errno
import re

from iml_common.lib.service_control import ServiceControl
from iml_common.lib.firewall_control import FirewallControl

from chroma_agent.lib.corosync import (
    CorosyncRingInterface,
    render_config,
    write_config_to_file,
)
from iml_common.lib.agent_rpc import agent_error, agent_result_ok, agent_ok_or_error

corosync_service = ServiceControl.create("corosync")
firewall_control = FirewallControl.create()


def start_corosync():
    return agent_ok_or_error(corosync_service.start())


def stop_corosync():

    return agent_ok_or_error(corosync_service.stop())


def restart_corosync():
    return agent_ok_or_error(corosync_service.restart())