示例#1
0
    def install_plugins():
        """
        (Re)load plugins
        """
        if ServiceManager.has_service('ovs-watcher-framework',
                                      SSHClient('127.0.0.1', username='******')):
            # If the watcher is running, 'ovs setup' was executed and we need to restart everything to load
            # the plugin. In the other case, the plugin will be loaded once 'ovs setup' is executed
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            clients = []
            try:
                for storagerouter in StorageRouterList.get_storagerouters():
                    clients.append(SSHClient(storagerouter, username='******'))
            except UnableToConnectException:
                raise RuntimeError('Not all StorageRouters are reachable')

            for client in clients:
                for service_name in ['watcher-framework', 'memcached']:
                    ServiceManager.stop_service(service_name, client=client)
                    wait = 30
                    while wait > 0:
                        if ServiceManager.get_service_status(
                                service_name, client=client) is False:
                            break
                        time.sleep(1)
                        wait -= 1
                    if wait == 0:
                        raise RuntimeError(
                            'Could not stop service: {0}'.format(service_name))

            for client in clients:
                for service_name in ['memcached', 'watcher-framework']:
                    ServiceManager.start_service(service_name, client=client)
                    wait = 30
                    while wait > 0:
                        if ServiceManager.get_service_status(
                                service_name, client=client) is True:
                            break
                        time.sleep(1)
                        wait -= 1
                    if wait == 0:
                        raise RuntimeError(
                            'Could not start service: {0}'.format(
                                service_name))

            from ovs.dal.helpers import Migration
            Migration.migrate()

            from ovs.lib.helpers.toolbox import Toolbox
            ip = System.get_my_storagerouter().ip
            functions = Toolbox.fetch_hooks('plugin', 'postinstall')
            for function in functions:
                function(ip=ip)
示例#2
0
 def stop(cluster_name, client):
     """
     Stops an arakoon service
     """
     if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True and \
             ServiceManager.get_service_status('arakoon-{0}'.format(cluster_name), client=client) is True:
         ServiceManager.stop_service('arakoon-{0}'.format(cluster_name), client=client)
示例#3
0
    def change_service_state(client, name, state, logger=None):
        """
        Starts/stops/restarts a service
        :param client: SSHClient on which to connect and change service state
        :param name: Name of the service
        :param state: State to put the service in
        :param logger: LogHandler Object
        """
        action = None
        status, _ = ServiceManager.get_service_status(name, client=client)
        if status is False and state in ['start', 'restart']:
            if logger is not None:
                logger.debug('  {0:<15} - Starting service {1}'.format(client.ip, name))
            ServiceManager.start_service(name, client=client)
            action = 'started'
        elif status is True and state == 'stop':
            if logger is not None:
                logger.debug('  {0:<15} - Stopping service {1}'.format(client.ip, name))
            ServiceManager.stop_service(name, client=client)
            action = 'stopped'
        elif status is True and state == 'restart':
            if logger is not None:
                logger.debug('  {0:<15} - Restarting service {1}'.format(client.ip, name))
            ServiceManager.restart_service(name, client=client)
            action = 'restarted'

        if action is None:
            print '  [{0}] {1} already {2}'.format(client.ip, name, 'running' if status is True else 'halted')
        else:
            logger.debug('  {0:<15} - Service {1} {2}'.format(client.ip, name, action))
            print '  [{0}] {1} {2}'.format(client.ip, name, action)
示例#4
0
    def _setup_proxy(initial_cluster, slave_client, cluster_name, force=False):
        base_name = 'ovs-etcd-proxy'
        target_name = 'ovs-etcd-{0}'.format(cluster_name)
        if force is False and ServiceManager.has_service(target_name, slave_client) and \
            ServiceManager.get_service_status(target_name, slave_client) is True:
            logger.info('Service {0} already configured and running'.format(target_name))
            return
        EtcdInstaller.stop(cluster_name, slave_client)

        data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name)
        wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name)
        abs_paths = [data_dir, wal_dir]
        slave_client.dir_delete(abs_paths)
        slave_client.dir_create(data_dir)
        slave_client.dir_chmod(data_dir, 0755, recursive=True)
        slave_client.dir_chown(data_dir, 'ovs', 'ovs', recursive=True)

        ServiceManager.add_service(base_name, slave_client,
                                   params={'CLUSTER': cluster_name,
                                           'DATA_DIR': data_dir,
                                           'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'),
                                           'INITIAL_CLUSTER': initial_cluster},
                                   target_name=target_name)
        EtcdInstaller.start(cluster_name, slave_client)
        EtcdInstaller.wait_for_cluster(cluster_name, slave_client)
示例#5
0
    def install_plugins():
        """
        (Re)load plugins
        """
        if ServiceManager.has_service('ovs-watcher-framework', SSHClient('127.0.0.1', username='******')):
            # If the watcher is running, 'ovs setup' was executed and we need to restart everything to load
            # the plugin. In the other case, the plugin will be loaded once 'ovs setup' is executed
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            clients = []
            try:
                for storagerouter in StorageRouterList.get_storagerouters():
                    clients.append(SSHClient(storagerouter, username='******'))
            except UnableToConnectException:
                raise RuntimeError('Not all StorageRouters are reachable')

            for client in clients:
                for service_name in ['watcher-framework', 'memcached']:
                    ServiceManager.stop_service(service_name, client=client)
                    wait = 30
                    while wait > 0:
                        if ServiceManager.get_service_status(service_name, client=client) is False:
                            break
                        time.sleep(1)
                        wait -= 1
                    if wait == 0:
                        raise RuntimeError('Could not stop service: {0}'.format(service_name))

            for client in clients:
                for service_name in ['memcached', 'watcher-framework']:
                    ServiceManager.start_service(service_name, client=client)
                    wait = 30
                    while wait > 0:
                        if ServiceManager.get_service_status(service_name, client=client) is True:
                            break
                        time.sleep(1)
                        wait -= 1
                    if wait == 0:
                        raise RuntimeError('Could not start service: {0}'.format(service_name))

            from ovs.dal.helpers import Migration
            Migration.migrate()

            from ovs.lib.helpers.toolbox import Toolbox
            ip = System.get_my_storagerouter().ip
            functions = Toolbox.fetch_hooks('plugin', 'postinstall')
            for function in functions:
                function(ip=ip)
 def start(cluster_name, client):
     """
     Starts an arakoon cluster
     """
     if ServiceManager.get_service_status(
             'arakoon-{0}'.format(cluster_name), client=client) is False:
         ServiceManager.start_service('arakoon-{0}'.format(cluster_name),
                                      client=client)
 def get_service_status(name, client):
     """
     Check the status of the service
     :param name: Name of the service
     :param client: SSHClient object
     :return: True if service is running
     """
     return ServiceManager.get_service_status(name, client)
 def stop(cluster_name, client):
     """
     Stops an arakoon service
     """
     if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True and \
             ServiceManager.get_service_status('arakoon-{0}'.format(cluster_name), client=client) is True:
         ServiceManager.stop_service('arakoon-{0}'.format(cluster_name),
                                     client=client)
 def stop(cluster_name, client):
     """
     Stops an arakoon service
     :param client: Client on which to stop the service
     :param cluster_name: The name of the cluster service to stop
     """
     if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True and \
             ServiceManager.get_service_status('arakoon-{0}'.format(cluster_name), client=client) is True:
         ServiceManager.stop_service('arakoon-{0}'.format(cluster_name), client=client)
示例#10
0
 def is_running(cluster_name, client):
     """
     Checks if arakoon service is running
     :param client: Client on which to stop the service
     :param cluster_name: The name of the cluster service to stop
     """
     if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client):
         return ServiceManager.get_service_status('arakoon-{0}'.format(cluster_name), client=client)
     return False
示例#11
0
 def stop(cluster_name, client):
     """
     Stops an arakoon service
     """
     if (
         ServiceManager.has_service("arakoon-{0}".format(cluster_name), client=client) is True
         and ServiceManager.get_service_status("arakoon-{0}".format(cluster_name), client=client) is True
     ):
         ServiceManager.stop_service("arakoon-{0}".format(cluster_name), client=client)
示例#12
0
 def start(cluster_name, client):
     """
     Starts an etcd cluster
     :param client: Client on which to start the service
     :param cluster_name: The name of the cluster service to start
     """
     if ServiceManager.has_service('etcd-{0}'.format(cluster_name), client=client) is True and \
             ServiceManager.get_service_status('etcd-{0}'.format(cluster_name), client=client) is False:
         ServiceManager.start_service('etcd-{0}'.format(cluster_name), client=client)
示例#13
0
 def manage_running_tasks(tasklist, timesleep=10):
     """
     Manage a list of running celery task
     - discard PENDING tasks after a certain timeout
     - validate RUNNING tasks are actually running
     :param tasklist: Dictionary of tasks to wait {IP address: AsyncResult}
     :type tasklist: dict
     :param timesleep: leep between checks -
       -for long running tasks it's better to sleep for a longer period of time to reduce number of ssh calls
     :type timesleep: int
     :return: results
     :rtype: dict
     """
     logger = LogHandler.get('lib', name='celery toolbox')
     ssh_clients = {}
     tasks_pending = {}
     tasks_pending_timeout = 1800  # 30 minutes
     results = {}
     failed_nodes = []
     while len(tasklist.keys()) > 0:
         for ip, task in tasklist.items():
             if task.state in ('SUCCESS', 'FAILURE'):
                 logger.info('Task {0} finished: {1}'.format(task.id, task.state))
                 results[ip] = task.get(propagate=False)
                 del tasklist[ip]
             elif task.state == 'PENDING':
                 if task.id not in tasks_pending:
                     tasks_pending[task.id] = time.time()
                 else:
                     task_pending_since = tasks_pending[task.id]
                     if time.time() - task_pending_since > tasks_pending_timeout:
                         logger.warning('Task {0} is pending since {1} on node {2}. Task will be revoked'.format(task.id, datetime.datetime.fromtimestamp(task_pending_since), ip))
                         revoke(task.id)
                         del tasklist[ip]
                         del tasks_pending[task.id]
                         failed_nodes.append(ip)
             elif task.state == 'STARTED':
                 if ip not in ssh_clients:
                     ssh_clients[ip] = SSHClient(ip, username='******')
                 client = ssh_clients[ip]
                 if ServiceManager.get_service_status('workers', client) is False:
                     logger.error('Service ovs-workers on node {0} appears halted while there is a task PENDING for it {1}. Task will be revoked.'.format(ip, task.id))
                     revoke(task.id)
                     del tasklist[ip]
                     failed_nodes.append(ip)
                 else:
                     ping_result = task.app.control.inspect().ping()
                     storage_router = StorageRouterList.get_by_ip(ip)
                     if "celery@{0}".format(storage_router.name) not in ping_result:
                         logger.error('Service ovs-workers on node {0} is not reachable via rabbitmq while there is a task STARTED for it {1}. Task will be revoked.'.format(ip, task.id))
                         revoke(task.id)
                         del tasklist[ip]
                         failed_nodes.append(ip)
         if len(tasklist.keys()) > 0:
             time.sleep(timesleep)
     return results, failed_nodes
示例#14
0
 def manage_running_tasks(tasklist, timesleep=10):
     """
     Manage a list of running celery task
     - discard PENDING tasks after a certain timeout
     - validate RUNNING tasks are actually running
     :param tasklist: Dictionary of tasks to wait {IP address: AsyncResult}
     :type tasklist: dict
     :param timesleep: leep between checks -
       -for long running tasks it's better to sleep for a longer period of time to reduce number of ssh calls
     :type timesleep: int
     :return: results
     :rtype: dict
     """
     logger = LogHandler.get('lib', name='celery toolbox')
     ssh_clients = {}
     tasks_pending = {}
     tasks_pending_timeout = 1800  # 30 minutes
     results = {}
     failed_nodes = []
     while len(tasklist.keys()) > 0:
         for ip, task in tasklist.items():
             if task.state in ('SUCCESS', 'FAILURE'):
                 logger.info('Task {0} finished: {1}'.format(task.id, task.state))
                 results[ip] = task.get(propagate=False)
                 del tasklist[ip]
             elif task.state == 'PENDING':
                 if task.id not in tasks_pending:
                     tasks_pending[task.id] = time.time()
                 else:
                     task_pending_since = tasks_pending[task.id]
                     if time.time() - task_pending_since > tasks_pending_timeout:
                         logger.warning('Task {0} is pending since {1} on node {2}. Task will be revoked'.format(task.id, datetime.datetime.fromtimestamp(task_pending_since), ip))
                         revoke(task.id)
                         del tasklist[ip]
                         del tasks_pending[task.id]
                         failed_nodes.append(ip)
             elif task.state == 'STARTED':
                 if ip not in ssh_clients:
                     ssh_clients[ip] = SSHClient(ip, username='******')
                 client = ssh_clients[ip]
                 if ServiceManager.get_service_status('workers', client) is False:
                     logger.error('Service ovs-workers on node {0} appears halted while there is a task PENDING for it {1}. Task will be revoked.'.format(ip, task.id))
                     revoke(task.id)
                     del tasklist[ip]
                     failed_nodes.append(ip)
                 else:
                     ping_result = task.app.control.inspect().ping()
                     storage_router = StorageRouterList.get_by_ip(ip)
                     if "celery@{0}".format(storage_router.name) not in ping_result:
                         logger.error('Service ovs-workers on node {0} is not reachable via rabbitmq while there is a task STARTED for it {1}. Task will be revoked.'.format(ip, task.id))
                         revoke(task.id)
                         del tasklist[ip]
                         failed_nodes.append(ip)
         if len(tasklist.keys()) > 0:
             time.sleep(timesleep)
     return results, failed_nodes
示例#15
0
 def start(cluster_name, client):
     """
     Starts an arakoon cluster
     :param client: Client on which to start the service
     :param cluster_name: The name of the cluster service to start
     """
     if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True and \
             ServiceManager.get_service_status('arakoon-{0}'.format(cluster_name), client=client) is False:
         ServiceManager.start_service('arakoon-{0}'.format(cluster_name),
                                      client=client)
示例#16
0
 def wait_for_service(client, name, status, logger):
     """
     Wait for service to enter status
     :param client: SSHClient to run commands
     :param name: name of service
     :param status: True - running/False - not running
     :param logger: Logging object
     """
     tries = 10
     while tries > 0:
         service_status, _ = ServiceManager.get_service_status(name, client)
         if service_status == status:
             break
         logger.debug('... waiting for service {0}'.format(name))
         tries -= 1
         time.sleep(10 - tries)
     service_status, output = ServiceManager.get_service_status(name, client)
     if service_status != status:
         raise RuntimeError('Service {0} does not have expected status: {1}'.format(name, output))
示例#17
0
 def stop(cluster_name, client):
     """
     Stops an etcd service
     :param client: Client on which to stop the service
     :param cluster_name: The name of the cluster service to stop
     """
     if ServiceManager.has_service('etcd-{0}'.format(cluster_name), client=client) is True and \
             ServiceManager.get_service_status('etcd-{0}'.format(cluster_name), client=client) is True:
         ServiceManager.stop_service('etcd-{0}'.format(cluster_name),
                                     client=client)
示例#18
0
 def is_running(cluster_name, client):
     """
     Checks if arakoon service is running
     :param cluster_name: The name of the cluster service to check
     :type cluster_name: str
     :param client: Client on which to check the service
     :type client: SSHClient
     :return: None
     """
     service_name = ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name)
     if ServiceManager.has_service(name=service_name, client=client):
         return ServiceManager.get_service_status(name=service_name, client=client)[0]
     return False
示例#19
0
 def _is_cinder_running(self):
     if self.is_devstack:
         try:
             return 'cinder-volume' in str(self.client.run('ps aux | grep cinder-volume | grep -v grep'))
         except SystemExit:
             return False
     if self.is_openstack:
         try:
             cinder_service = OSManager.get_openstack_cinder_service_name()
             return ServiceManager.get_service_status(cinder_service, self.client)
         except SystemExit:
             return False
     return False
示例#20
0
 def wait_for_service(client, name, status, logger):
     """
     Wait for service to enter status
     :param client: SSHClient to run commands
     :param name: name of service
     :param status: True - running/False - not running
     :param logger: Logging object
     """
     tries = 10
     while tries > 0:
         service_status, _ = ServiceManager.get_service_status(name, client)
         if service_status == status:
             break
         logger.debug('... waiting for service {0}'.format(name))
         tries -= 1
         time.sleep(10 - tries)
     service_status, output = ServiceManager.get_service_status(
         name, client)
     if service_status != status:
         raise RuntimeError(
             'Service {0} does not have expected status: {1}'.format(
                 name, output))
示例#21
0
    def create_cluster(cluster_name, ip, server_port=DEFAULT_SERVER_PORT, client_port=DEFAULT_CLIENT_PORT):
        """
        Creates a cluster
        :param cluster_name: Name of the cluster
        :type cluster_name: str

        :param ip: IP address of the first node of the new cluster
        :type ip: str

        :param server_port: Port to be used by server
        :type server_port: int

        :param client_port: Port to be used by client
        :type client_port: int

        :return: None
        """
        EtcdInstaller._logger.debug('Creating cluster "{0}" on {1}'.format(cluster_name, ip))

        client = SSHClient(ip, username='******')
        target_name = 'ovs-etcd-{0}'.format(cluster_name)
        if ServiceManager.has_service(target_name, client) and ServiceManager.get_service_status(target_name, client) is True:
            EtcdInstaller._logger.info('Service {0} already configured and running'.format(target_name))
            return

        node_name = System.get_my_machine_id(client)
        data_dir = EtcdInstaller.DATA_DIR.format(cluster_name)
        wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name)
        abs_paths = [data_dir, wal_dir]
        client.dir_delete(abs_paths)
        client.dir_create(abs_paths)
        client.dir_chmod(abs_paths, 0755, recursive=True)
        client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True)

        base_name = 'ovs-etcd'
        ServiceManager.add_service(base_name, client,
                                   params={'CLUSTER': cluster_name,
                                           'NODE_ID': node_name,
                                           'DATA_DIR': data_dir,
                                           'WAL_DIR': wal_dir,
                                           'SERVER_URL': EtcdInstaller.SERVER_URL.format(ip, server_port),
                                           'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(ip, client_port),
                                           'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1', client_port),
                                           'INITIAL_CLUSTER': '{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(ip, server_port)),
                                           'INITIAL_STATE': 'new',
                                           'INITIAL_PEERS': '-initial-advertise-peer-urls {0}'.format(EtcdInstaller.SERVER_URL.format(ip, server_port))},
                                   target_name=target_name)
        EtcdInstaller.start(cluster_name, client)
        EtcdInstaller.wait_for_cluster(cluster_name, client, client_port=client_port)

        EtcdInstaller._logger.debug('Creating cluster "{0}" on {1} completed'.format(cluster_name, ip))
示例#22
0
    def create_cluster(cluster_name, ip, server_port=DEFAULT_SERVER_PORT, client_port=DEFAULT_CLIENT_PORT):
        """
        Creates a cluster
        :param cluster_name: Name of the cluster
        :type cluster_name: str

        :param ip: IP address of the first node of the new cluster
        :type ip: str

        :param server_port: Port to be used by server
        :type server_port: int

        :param client_port: Port to be used by client
        :type client_port: int

        :return: None
        """
        EtcdInstaller._logger.debug('Creating cluster "{0}" on {1}'.format(cluster_name, ip))

        client = SSHClient(ip, username='******')
        target_name = 'ovs-etcd-{0}'.format(cluster_name)
        if ServiceManager.has_service(target_name, client) and ServiceManager.get_service_status(target_name, client) is True:
            EtcdInstaller._logger.info('Service {0} already configured and running'.format(target_name))
            return

        node_name = System.get_my_machine_id(client)
        data_dir = EtcdInstaller.DATA_DIR.format(cluster_name)
        wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name)
        abs_paths = [data_dir, wal_dir]
        client.dir_delete(abs_paths)
        client.dir_create(abs_paths)
        client.dir_chmod(abs_paths, 0755, recursive=True)
        client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True)

        base_name = 'ovs-etcd'
        ServiceManager.add_service(base_name, client,
                                   params={'CLUSTER': cluster_name,
                                           'NODE_ID': node_name,
                                           'DATA_DIR': data_dir,
                                           'WAL_DIR': wal_dir,
                                           'SERVER_URL': EtcdInstaller.SERVER_URL.format(ip, server_port),
                                           'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(ip, client_port),
                                           'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1', client_port),
                                           'INITIAL_CLUSTER': '{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(ip, server_port)),
                                           'INITIAL_STATE': 'new',
                                           'INITIAL_PEERS': '-initial-advertise-peer-urls {0}'.format(EtcdInstaller.SERVER_URL.format(ip, server_port))},
                                   target_name=target_name)
        EtcdInstaller.start(cluster_name, client)
        EtcdInstaller.wait_for_cluster(cluster_name, client, client_port=client_port)

        EtcdInstaller._logger.debug('Creating cluster "{0}" on {1} completed'.format(cluster_name, ip))
示例#23
0
    def is_running(cluster_name, client):
        """
        Checks if arakoon service is running
        :param cluster_name: The name of the cluster service to check
        :type cluster_name: str

        :param client: Client on which to check the service
        :type client: SSHClient

        :return: None
        """
        if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client):
            return ServiceManager.get_service_status('arakoon-{0}'.format(cluster_name), client=client)
        return False
示例#24
0
    def is_running(cluster_name, client):
        """
        Checks if arakoon service is running
        :param cluster_name: The name of the cluster service to check
        :type cluster_name: str

        :param client: Client on which to check the service
        :type client: SSHClient

        :return: None
        """
        if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client):
            return ServiceManager.get_service_status('arakoon-{0}'.format(cluster_name), client=client)[0]
        return False
示例#25
0
 def _is_cinder_running(self):
     if self.is_devstack:
         try:
             return 'cinder-volume' in str(
                 self.client.run(
                     'ps aux | grep cinder-volume | grep -v grep'))
         except SystemExit:
             return False
     if self.is_openstack:
         try:
             cinder_service = OSManager.get_openstack_cinder_service_name()
             return ServiceManager.get_service_status(
                 cinder_service, self.client)
         except SystemExit:
             return False
     return False
示例#26
0
    def change_service_state(client, name, state, logger=None):
        """
        Starts/stops/restarts a service
        :param client: SSHClient on which to connect and change service state
        :param name: Name of the service
        :param state: State to put the service in
        :param logger: LogHandler Object
        """
        action = None
        # Enable service before changing the state
        status = ServiceManager.is_enabled(name, client=client)
        if status is False:
            if logger is not None:
                logger.debug('  {0:<15} - Enabling service {1}'.format(
                    client.ip, name))
            ServiceManager.enable_service(name, client=client)

        status = ServiceManager.get_service_status(name, client=client)
        if status is False and state in ['start', 'restart']:
            if logger is not None:
                logger.debug('  {0:<15} - Starting service {1}'.format(
                    client.ip, name))
            ServiceManager.start_service(name, client=client)
            action = 'started'
        elif status is True and state == 'stop':
            if logger is not None:
                logger.debug('  {0:<15} - Stopping service {1}'.format(
                    client.ip, name))
            ServiceManager.stop_service(name, client=client)
            action = 'stopped'
        elif status is True and state == 'restart':
            if logger is not None:
                logger.debug('  {0:<15} - Restarting service {1}'.format(
                    client.ip, name))
            ServiceManager.restart_service(name, client=client)
            action = 'restarted'

        if action is None:
            print '  [{0}] {1} already {2}'.format(
                client.ip, name, 'running' if status is True else 'halted')
        else:
            logger.debug('  {0:<15} - Service {1} {2}'.format(
                client.ip, name, action))
            print '  [{0}] {1} {2}'.format(client.ip, name, action)
示例#27
0
    def _setup_proxy(initial_cluster,
                     slave_client,
                     cluster_name,
                     force=False,
                     client_port=DEFAULT_CLIENT_PORT):
        base_name = 'ovs-etcd-proxy'
        target_name = 'ovs-etcd-{0}'.format(cluster_name)
        if force is False and ServiceManager.has_service(
                target_name,
                slave_client) and ServiceManager.get_service_status(
                    target_name, slave_client)[0] is True:
            EtcdInstaller._logger.info(
                'Service {0} already configured and running'.format(
                    target_name))
            return
        EtcdInstaller.stop(cluster_name, slave_client)

        data_dir = EtcdInstaller.DATA_DIR.format(cluster_name)
        wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name)
        abs_paths = [data_dir, wal_dir]
        slave_client.dir_delete(abs_paths)
        slave_client.dir_create(data_dir)
        slave_client.dir_chmod(data_dir, 0755, recursive=True)
        slave_client.dir_chown(data_dir, 'ovs', 'ovs', recursive=True)

        ServiceManager.add_service(base_name,
                                   slave_client,
                                   params={
                                       'CLUSTER':
                                       cluster_name,
                                       'DATA_DIR':
                                       data_dir,
                                       'LOCAL_CLIENT_URL':
                                       EtcdInstaller.CLIENT_URL.format(
                                           '127.0.0.1', client_port),
                                       'INITIAL_CLUSTER':
                                       initial_cluster
                                   },
                                   target_name=target_name)
        EtcdInstaller.start(cluster_name, slave_client)
        EtcdInstaller.wait_for_cluster(cluster_name,
                                       slave_client,
                                       client_port=client_port)
    def validate_vpool_sanity(expected_settings):
        """
        Check if all requirements are met for a healthy vPool
        :param expected_settings: Parameters used to create a vPool, which will be verified
        :type expected_settings: dict

        :return: None
        """
        if not isinstance(expected_settings, dict) or len(expected_settings) == 0:
            raise ValueError("Cannot validate vpool when no settings are passed")

        generic_settings = expected_settings.values()[0]
        vpool_name = generic_settings["vpool_name"]
        mountpoint = "/mnt/{0}".format(vpool_name)
        backend_type = generic_settings["type"]
        rdma_enabled = (
            generic_settings["config_params"]["dtl_transport"] == StorageDriverClient.FRAMEWORK_DTL_TRANSPORT_RSOCKET
        )

        vpool = GeneralVPool.get_vpool_by_name(vpool_name=vpool_name)
        assert vpool is not None, "Could not find vPool with name {0}".format(vpool_name)
        vpool_config = GeneralVPool.get_configuration(vpool)

        # Verify some basic vPool attributes
        assert vpool.name == vpool_name, "Expected name {0} for vPool".format(vpool_name)
        assert vpool.status == VPool.STATUSES.RUNNING, "vPool does not have RUNNING status"
        assert vpool.rdma_enabled == rdma_enabled, "RDMA enabled setting is incorrect"
        assert set(expected_settings.keys()) == set(
            [sd.storagerouter for sd in vpool.storagedrivers]
        ), "vPool storagerouters don't match the expected Storage Routers"

        # Verify vPool Storage Driver configuration
        expected_vpool_config = copy.deepcopy(generic_settings["config_params"])
        for key, value in vpool_config.iteritems():
            if key == "dtl_enabled" or key == "tlog_multiplier" or key == "dtl_config_mode":
                continue
            if key not in expected_vpool_config:
                raise ValueError("Expected settings does not contain key {0}".format(key))

            if value != expected_vpool_config[key]:
                raise ValueError(
                    "vPool does not have expected configuration {0} for key {1}".format(expected_vpool_config[key], key)
                )
            expected_vpool_config.pop(key)

        if len(expected_vpool_config) > 0:
            raise ValueError(
                "Actual vPool configuration does not contain keys: {0}".format(", ".join(expected_vpool_config.keys()))
            )

        # Prepare some fields to check
        config = generic_settings["config_params"]
        dtl_mode = config["dtl_mode"]
        sco_size = config["sco_size"]
        cluster_size = config["cluster_size"]
        write_buffer = config["write_buffer"]
        dtl_transport = config["dtl_transport"]
        # @TODO: Add more validations for other expected settings (instead of None)
        expected_config = {
            "backend_connection_manager": {
                "backend_interface_retries_on_error": 5,
                "backend_interface_retry_interval_secs": 1,
                "backend_interface_retry_backoff_multiplier": 2.0,
            },
            "content_addressed_cache": {
                "clustercache_mount_points": None,
                "read_cache_serialization_path": u"/var/rsp/{0}".format(vpool.name),
            },
            "distributed_lock_store": {
                "dls_arakoon_cluster_id": None,
                "dls_arakoon_cluster_nodes": None,
                "dls_type": u"Arakoon",
            },
            "distributed_transaction_log": {"dtl_path": None, "dtl_transport": dtl_transport.upper()},
            "event_publisher": {"events_amqp_routing_key": u"volumerouter", "events_amqp_uris": None},
            "file_driver": {"fd_cache_path": None, "fd_extent_cache_capacity": u"1024", "fd_namespace": None},
            "filesystem": {
                "fs_dtl_config_mode": u"Automatic",
                "fs_dtl_mode": u"{0}".format(StorageDriverClient.VPOOL_DTL_MODE_MAP[dtl_mode]),
                "fs_enable_shm_interface": 1,
                "fs_file_event_rules": None,
                "fs_metadata_backend_arakoon_cluster_nodes": None,
                "fs_metadata_backend_mds_nodes": None,
                "fs_metadata_backend_type": u"MDS",
                "fs_raw_disk_suffix": None,
                "fs_virtual_disk_format": None,
            },
            "metadata_server": {"mds_nodes": None},
            "scocache": {"backoff_gap": u"2GB", "scocache_mount_points": None, "trigger_gap": u"1GB"},
            "threadpool_component": {"num_threads": 16},
            "volume_manager": {
                "clean_interval": 1,
                "default_cluster_size": 1024 * cluster_size,
                "dtl_throttle_usecs": 4000,
                "metadata_path": None,
                "non_disposable_scos_factor": float(write_buffer)
                / StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size]
                / sco_size,
                "number_of_scos_in_tlog": StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size],
                "tlog_path": None,
            },
            "volume_registry": {"vregistry_arakoon_cluster_id": u"voldrv", "vregistry_arakoon_cluster_nodes": None},
            "volume_router": {
                "vrouter_backend_sync_timeout_ms": 5000,
                "vrouter_file_read_threshold": 1024,
                "vrouter_file_write_threshold": 1024,
                "vrouter_id": None,
                "vrouter_max_workers": 16,
                "vrouter_migrate_timeout_ms": 5000,
                "vrouter_min_workers": 4,
                "vrouter_redirect_timeout_ms": u"5000",
                "vrouter_routing_retries": 10,
                "vrouter_sco_multiplier": 1024,
                "vrouter_volume_read_threshold": 1024,
                "vrouter_volume_write_threshold": 1024,
            },
            "volume_router_cluster": {"vrouter_cluster_id": None},
        }
        vpool_services = {
            "all": [
                "ovs-watcher-volumedriver",
                "ovs-dtl_{0}".format(vpool.name),
                "ovs-volumedriver_{0}".format(vpool.name),
                "ovs-volumerouter-consumer",
            ],
            "extra": [],
            "master": ["ovs-arakoon-voldrv"],
        }
        sd_partitions = {"DB": ["MD", "MDS", "TLOG"], "WRITE": ["FD", "DTL", "SCO"]}

        assert Configuration.exists("/ovs/arakoon/voldrv/config", raw=True), "Volumedriver arakoon does not exist"

        # Do some verifications for all SDs
        storage_ip = None
        voldrv_config = GeneralArakoon.get_config("voldrv")
        all_files = GeneralVPool.get_related_files(vpool=vpool)
        all_directories = GeneralVPool.get_related_directories(vpool=vpool)

        for storagedriver in vpool.storagedrivers:
            storagerouter = storagedriver.storagerouter
            root_client = SSHClient(storagerouter, username="******")

            assert Configuration.exists(
                "/ovs/vpools/{0}/hosts/{1}/config".format(vpool.guid, storagedriver.storagedriver_id), raw=True
            ), "vPool config not found in configuration"
            # @todo: replace next lines with implementation defined in: http://jira.openvstorage.com/browse/OVS-4577
            # current_config_sections = set([item for item in Configuration.list('/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, storagedriver.storagedriver_id))])
            # assert not current_config_sections.difference(set(expected_config.keys())), 'New section appeared in the storage driver config in configuration'
            # assert not set(expected_config.keys()).difference(current_config_sections), 'Config section expected for storage driver, but not found in configuration'
            #
            # for key, values in expected_config.iteritems():
            #     current_config = Configuration.get('/ovs/vpools/{0}/hosts/{1}/config/{2}'.format(vpool.guid, storagedriver.storagedriver_id, key))
            #     assert set(current_config.keys()).union(set(values.keys())) == set(values.keys()), 'Not all expected keys match for key "{0}" on Storage Driver {1}'.format(key, storagedriver.name)
            #
            #     for sub_key, value in current_config.iteritems():
            #         expected_value = values[sub_key]
            #         if expected_value is None:
            #             continue
            #         assert value == expected_value, 'Key: {0} - Sub key: {1} - Value: {2} - Expected value: {3}'.format(key, sub_key, value, expected_value)

            # Check services
            if storagerouter.node_type == "MASTER":
                for service_name in vpool_services["all"] + vpool_services["master"]:
                    if (
                        service_name == "ovs-arakoon-voldrv"
                        and GeneralStorageDriver.has_role(storagedriver, "DB") is False
                    ):
                        continue
                    exitcode, output = ServiceManager.get_service_status(name=service_name, client=root_client)
                    if exitcode is not True:
                        raise ValueError(
                            "Service {0} is not running on node {1} - {2}".format(
                                service_name, storagerouter.ip, output
                            )
                        )
            else:
                for service_name in vpool_services["all"] + vpool_services["extra"]:
                    exitcode, output = ServiceManager.get_service_status(name=service_name, client=root_client)
                    if exitcode is not True:
                        raise ValueError(
                            "Service {0} is not running on node {1} - {2}".format(
                                service_name, storagerouter.ip, output
                            )
                        )

            # Check arakoon config
            if not voldrv_config.has_section(storagerouter.machine_id):
                raise ValueError("Voldrv arakoon cluster does not have section {0}".format(storagerouter.machine_id))

            # Basic SD checks
            assert (
                storagedriver.cluster_ip == storagerouter.ip
            ), "Incorrect cluster IP. Expected: {0}  -  Actual: {1}".format(storagerouter.ip, storagedriver.cluster_ip)
            assert storagedriver.mountpoint == "/mnt/{0}".format(
                vpool.name
            ), "Incorrect mountpoint. Expected: {0}  -  Actual: {1}".format(mountpoint, storagedriver.mountpoint)
            if storage_ip is not None:
                assert (
                    storagedriver.storage_ip == storage_ip
                ), "Incorrect storage IP. Expected: {0}  -  Actual: {1}".format(storage_ip, storagedriver.storage_ip)
            storage_ip = storagedriver.storage_ip

            # Check required directories and files
            if storagerouter.guid not in all_directories:
                raise ValueError("Could not find directory information for Storage Router {0}".format(storagerouter.ip))
            if storagerouter.guid not in all_files:
                raise ValueError("Could not find file information for Storage Router {0}".format(storagerouter.ip))

            for directory in all_directories[storagerouter.guid]:
                if root_client.dir_exists(directory) is False:
                    raise ValueError(
                        "Directory {0} does not exist on Storage Router {1}".format(directory, storagerouter.ip)
                    )
            for file_name in all_files[storagerouter.guid]:
                if root_client.file_exists(file_name) is False:
                    raise ValueError(
                        "File {0} does not exist on Storage Router {1}".format(file_name, storagerouter.ip)
                    )

            # @TODO: check roles and sub_roles for all storagedrivers and not just once
            for partition in storagedriver.partitions:
                if partition.role in sd_partitions and partition.sub_role in sd_partitions[partition.role]:
                    sd_partitions[partition.role].remove(partition.sub_role)
                elif (
                    partition.role in sd_partitions
                    and partition.sub_role is None
                    and len(sd_partitions[partition.role])
                ):
                    sd_partitions[partition.role].remove("None")

            # Verify vPool writeable
            if GeneralHypervisor.get_hypervisor_type() == "VMWARE":
                GeneralVPool.mount_vpool(vpool=vpool, root_client=root_client)

            vdisk = GeneralVDisk.create_volume(size=10, vpool=vpool, root_client=root_client)
            GeneralVDisk.write_to_volume(
                vdisk=vdisk, vpool=vpool, root_client=root_client, count=10, bs="1M", input_type="random"
            )
            GeneralVDisk.delete_volume(vdisk=vdisk, vpool=vpool, root_client=root_client)

        for role, sub_roles in sd_partitions.iteritems():
            for sub_role in sub_roles:
                raise ValueError(
                    "Not a single Storage Driver found with partition role {0} and sub-role {1}".format(role, sub_role)
                )
示例#29
0
 def start(cluster_name, client):
     """
     Starts an arakoon cluster
     """
     if ServiceManager.get_service_status("arakoon-{0}".format(cluster_name), client=client) is False:
         ServiceManager.start_service("arakoon-{0}".format(cluster_name), client=client)
示例#30
0
    def execute_scrub_work(queue, vpool, scrub_info, error_messages):
        """
        Executes scrub work for a given vDisk queue and vPool, based on scrub_info
        :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool)
        :type queue: Queue
        :param vpool: the vPool object of the vDisks
        :type vpool: VPool
        :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter
                           that needs to do the work
        :type scrub_info: dict
        :param error_messages: A list of error messages to be filled
        :type error_messages: list
        :return: a list of error messages
        :rtype: list
        """

        def _verify_mds_config(current_vdisk):
            current_vdisk.invalidate_dynamics('info')
            vdisk_configs = current_vdisk.info['metadata_backend_config']
            if len(vdisk_configs) == 0:
                raise RuntimeError('Could not load MDS configuration')
            return vdisk_configs

        client = None
        lock_time = 5 * 60
        storagerouter = scrub_info['storage_router']
        scrub_directory = '{0}/scrub_work_{1}_{2}'.format(scrub_info['scrub_path'], vpool.name, storagerouter.name)
        scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(vpool.guid, storagerouter.guid)
        backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(vpool.guid, storagerouter.guid)
        alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(vpool.name, storagerouter.name)

        # Deploy a proxy
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_create(scrub_directory)
                client.dir_chmod(scrub_directory, 0777)  # Celery task executed by 'ovs' user and should be able to write in it
                if ServiceManager.has_service(name=alba_proxy_service, client=client) is True and ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True:
                    ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
                    scrub_config = Configuration.get(scrub_config_key)
                else:
                    machine_id = System.get_my_machine_id(client)
                    port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id))
                    port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0]
                    # Scrub config
                    # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini',
                    #  u'fragment_cache': [u'none'],
                    #  u'ips': [u'127.0.0.1'],
                    #  u'log_level': u'info',
                    #  u'manifest_cache_size': 17179869184,
                    #  u'port': 0,
                    #  u'transport': u'tcp'}

                    # Backend config
                    # {u'alba_connection_host': u'10.100.193.155',
                    #  u'alba_connection_port': 26204,
                    #  u'alba_connection_preset': u'preset',
                    #  u'alba_connection_timeout': 15,
                    #  u'alba_connection_transport': u'TCP',
                    #  u'backend_interface_retries_on_error': 5,
                    #  u'backend_interface_retry_backoff_multiplier': 2.0,
                    #  u'backend_interface_retry_interval_secs': 1,
                    #  u'backend_type': u'ALBA'}
                    scrub_config = Configuration.get('ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid))
                    scrub_config['port'] = port
                    scrub_config['transport'] = 'tcp'
                    Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True)

                    params = {'VPOOL_NAME': vpool.name,
                              'LOG_SINK': LogHandler.get_sink_path('alba_proxy'),
                              'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key)}
                    ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service)
                    ServiceManager.start_service(name=alba_proxy_service, client=client)
                    ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))

                backend_config = Configuration.get('ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, vpool.storagedrivers[0].storagedriver_id))['backend_connection_manager']
                backend_config['alba_connection_host'] = '127.0.0.1'
                backend_config['alba_connection_port'] = scrub_config['port']
                Configuration.set(backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True)
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
            if client is not None and ServiceManager.has_service(name=alba_proxy_service, client=client) is True:
                if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True:
                    ServiceManager.stop_service(name=alba_proxy_service, client=client)
                ServiceManager.remove_service(name=alba_proxy_service, client=client)
            if Configuration.exists(scrub_config_key):
                Configuration.delete(scrub_config_key)

        try:
            # Empty the queue with vDisks to scrub
            with remote(storagerouter.ip, [VDisk]) as rem:
                while True:
                    vdisk = None
                    vdisk_guid = queue.get(False)
                    try:
                        # Check MDS master is local. Trigger MDS handover if necessary
                        vdisk = rem.VDisk(vdisk_guid)
                        ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'.format(vpool.name, storagerouter.name, vdisk.name, scrub_directory))
                        configs = _verify_mds_config(current_vdisk=vdisk)
                        storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id)
                        if configs[0].get('ip') != storagedriver.storagerouter.ip:
                            ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'.format(vpool.name, storagerouter.name, vdisk.name))
                            MDSServiceController.ensure_safety(VDisk(vdisk_guid))  # Do not use a remote VDisk instance here
                            configs = _verify_mds_config(current_vdisk=vdisk)
                            if configs[0].get('ip') != storagedriver.storagerouter.ip:
                                ScheduledTaskController._logger.warning('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'.format(vpool.name, storagerouter.name, vdisk.name))
                                continue

                        # Do the actual scrubbing
                        with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client:
                            ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'.format(vpool.name, storagerouter.name, vdisk.name))
                            work_units = locked_client.get_scrubbing_workunits()
                            for work_unit in work_units:
                                res = locked_client.scrub(work_unit=work_unit,
                                                          scratch_dir=scrub_directory,
                                                          log_sinks=[LogHandler.get_sink_path('scrubber', allow_override=True)],
                                                          backend_config=Configuration.get_configuration_path(backend_config_key))
                                locked_client.apply_scrubbing_result(scrubbing_work_result=res)
                            if work_units:
                                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'.format(vpool.name, storagerouter.name, vdisk.name, len(work_units)))
                            else:
                                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'.format(vpool.name, storagerouter.name, vdisk.name))
                    except Exception:
                        if vdisk is None:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(vpool.name, storagerouter.name, vdisk_guid)
                        else:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(vpool.name, storagerouter.name, vdisk.name)
                        error_messages.append(message)
                        ScheduledTaskController._logger.exception(message)

        except Empty:  # Raised when all items have been fetched from the queue
            ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'.format(vpool.name, storagerouter.name))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(vpool.name, storagerouter.name)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)

        # Delete the proxy again
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_delete(scrub_directory)
                if ServiceManager.has_service(alba_proxy_service, client=client):
                    ServiceManager.stop_service(alba_proxy_service, client=client)
                    ServiceManager.remove_service(alba_proxy_service, client=client)
                if Configuration.exists(scrub_config_key):
                    Configuration.delete(scrub_config_key)
                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
示例#31
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        ScheduledTaskController._logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    ScheduledTaskController._logger.info(
                        'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'
                        .format(storage_driver.storagerouter.ip,
                                partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            sshclient = SSHClient(storage_driver.storagerouter)
                            # Use ServiceManager(sshclient) to make sure ovs-workers are actually running
                            if ServiceManager.get_service_status(
                                    'workers', sshclient) is False:
                                ScheduledTaskController._logger.warning(
                                    'Gather Scrub - Storage Router {0:<15} - workers are not running'
                                    .format(storage_driver.storagerouter.ip))
                            else:
                                scrub_locations[
                                    storage_driver.storagerouter] = str(
                                        partition.path)
                        except UnableToConnectException:
                            ScheduledTaskController._logger.warning(
                                'Gather Scrub - Storage Router {0:<15} is not reachable'
                                .format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        if len(vdisk_guids) == 0:
            ScheduledTaskController._logger.info(
                'Gather Scrub - No scrub work needed'.format(len(vdisk_guids)))
            return

        ScheduledTaskController._logger.info(
            'Gather Scrub - Checking {0} volumes for scrub work'.format(
                len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = {}
        storage_router_list = []
        scrub_map = {}

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            ScheduledTaskController._logger.info(
                'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'
                .format(storage_router.ip,
                        'local' if local is True else 'remote',
                        len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set[storage_router.
                           ip] = ScheduledTaskController._execute_scrub_work.s(
                               scrub_location=scrub_info[1],
                               vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                   routing_key='sr.{0}'.format(
                                       storage_router.machine_id))
                storage_router_list.append(storage_router)
                scrub_map[storage_router.ip] = vdisk_guids_to_scrub

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(
                    scrub_location=local_scrub_location,
                    vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                ScheduledTaskController._logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(local_storage_router.ip, ex))

        all_results, failed_nodes = CeleryToolbox.manage_running_tasks(
            result_set,
            timesleep=60)  # Check every 60 seconds if tasks are still running

        for ip, result in all_results.iteritems():
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                ScheduledTaskController._logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(ip, result))

        result_set = {}
        for failed_node in failed_nodes:
            ScheduledTaskController._logger.warning(
                'Scrubbing failed on node {0}. Will reschedule on another node.'
                .format(failed_node))
            vdisk_guids_to_scrub = scrub_map[failed_node]
            rescheduled_work = False
            for storage_router, scrub_location in scrub_locations.items():
                if storage_router.ip not in failed_nodes:
                    if storage_router.machine_id != local_machineid:
                        ScheduledTaskController._logger.info(
                            'Rescheduled scrub work from node {0} to node {1}.'
                            .format(failed_node, storage_router.ip))
                        result_set[
                            storage_router.
                            ip] = ScheduledTaskController._execute_scrub_work.s(
                                scrub_location=scrub_location,
                                vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                    routing_key='sr.{0}'.format(
                                        storage_router.machine_id))
                        storage_router_list.append(storage_router)
                        rescheduled_work = True
                        break
            if rescheduled_work is False:
                if local_scrub_location is not None:
                    try:
                        processed_guids.extend(
                            ScheduledTaskController._execute_scrub_work(
                                scrub_location=local_scrub_location,
                                vdisk_guids=vdisk_guids_to_scrub))
                    except Exception as ex:
                        ScheduledTaskController._logger.error(
                            'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}'
                            .format(ex))
                else:
                    ScheduledTaskController._logger.warning(
                        'No nodes left to reschedule work from node {0}'.
                        format(failed_node))

        if len(result_set) > 0:
            all_results2, failed_nodes = CeleryToolbox.manage_running_tasks(
                result_set, timesleep=60
            )  # Check every 60 seconds if tasks are still running

            for ip, result in all_results2.iteritems():
                if isinstance(result, list):
                    processed_guids.extend(result)
                else:
                    ScheduledTaskController._logger.error(
                        'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                        .format(ip, result))

        if len(set(processed_guids)) != len(vdisk_guids) or set(
                processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        ScheduledTaskController._logger.info('Gather Scrub - Finished')
 def getStatusOfService(self, service_name):
     return ServiceManager.get_service_status(str(service_name), self.client)
示例#33
0
    def is_host_configured(self, ip):
        if (self._is_devstack is False and self._is_openstack is False) or self._cinder_installed is False or self._nova_installed is False:
            self._logger.warning('Host configured: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed')
            return False

        # 1. Check driver code
        if self._is_devstack is True:
            if not self.client.file_exists(filename = self._devstack_driver):
                self._logger.info('  File "{0}" does not exist'.format(self._devstack_driver))
                return False
        else:
            if not self.client.file_exists(filename = '{0}/cinder/volume/drivers/openvstorage.py'.format(self._driver_location)):
                self._logger.info('  File "{0}/cinder/volume/drivers/openvstorage.py" does not exist'.format(self._driver_location))
                return False

        # 2. Check configured users
        ovs_id = self.client.run('id -u ovs')
        if not ovs_id:
            self._logger.info('Failed to determine the OVS user group ID')
            return False

        users = ['libvirt-qemu', 'stack'] if self._is_devstack is True else self._openstack_users
        for user in users:
            if '{0}(ovs)'.format(ovs_id) not in self.client.run('id -a {0}'.format(user)):
                self._logger.info('User "{0}" is not part of the OVS user group')
                return False

        # 3. Check patches
        nova_base_path = self._get_base_path('nova')
        cinder_base_path = self._get_base_path('cinder')
        if self._stack_version in ('liberty', 'mitaka', 'newton'):
            try:
                import os_brick
                cinder_brick_initiator_file = "{0}/initiator/connector.py".format(os.path.dirname(os_brick.__file__))
            except ImportError:
                cinder_brick_initiator_file = ''
            if self._is_devstack is True:
                nova_volume_file = '{0}/virt/libvirt/volume/volume.py'.format(nova_base_path)
            else:
                nova_volume_file = '{0}/nova/virt/libvirt/volume/volume.py'.format(self._driver_location)
        else:
            if self._is_devstack is True:
                nova_volume_file = '{0}/virt/libvirt/volume.py'.format(nova_base_path)
            else:
                nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format(self._driver_location)
            cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format(cinder_base_path)

        if self._is_devstack is True:
            nova_driver_file = '{0}/virt/libvirt/driver.py'.format(nova_base_path)
        else:
            nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format(self._driver_location)

        file_contents = self.client.file_read(nova_volume_file)
        if 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):' not in file_contents:
            self._logger.info('File "{0}" is not configured properly'.format(nova_volume_file))
            return False

        if self._stack_version in ('liberty', 'mitaka'):
            check_line = 'file=nova.virt.libvirt.volume.volume.LibvirtFileVolumeDriver'
        else:
            check_line = 'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver'

        file_contents = self.client.file_read(nova_driver_file)
        if check_line not in file_contents:
            self._logger.info('File "{0}" is not configured properly'.format(nova_driver_file))
            return False

        if os.path.exists(cinder_brick_initiator_file):
            file_contents = self.client.file_read(cinder_brick_initiator_file)
            if self._stack_version in ('liberty', 'mitaka', 'newton'):
                if 'elif protocol in [LOCAL, "FILE"]:' not in file_contents:
                    self._logger.info('File "{0}" is not configured properly'.format(cinder_brick_initiator_file))
                    return False
            else:
                if 'elif protocol in ["LOCAL", "FILE"]:' not in file_contents:
                    self._logger.info('File "{0}" is not configured properly'.format(cinder_brick_initiator_file))
                    return False

        # 4. Check messaging driver configuration
        nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'
        cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'

        host_configured = True
        with remote(ip, [RawConfigParser], 'root') as rem:
            for config_file, driver in {self._NOVA_CONF: nova_messaging_driver,
                                        self._CINDER_CONF: cinder_messaging_driver}.iteritems():
                cfg = rem.RawConfigParser()
                cfg.read([config_file])
                host_configured &= cfg.get("DEFAULT", "notification_driver") == driver
                host_configured &= "notifications" in cfg.get("DEFAULT", "notification_topics")

                if config_file == self._NOVA_CONF:
                    host_configured &= cfg.get("DEFAULT", "notify_on_any_change") == "True"
                    host_configured &= cfg.get("DEFAULT", "notify_on_state_change") == "vm_and_task_state"

        if host_configured is False:
            self._logger.info('Nova and/or Cinder configuration files are not configured properly')
            return host_configured

        # 5. Check events consumer service
        service_name = 'ovs-openstack-events-consumer'
        if not (ServiceManager.has_service(service_name, self.client) and ServiceManager.get_service_status(service_name, self.client) is True):
            self._logger.info('Service "{0}" is not configured properly'.format(service_name))
            return False

        return True
    def validate_vpool_sanity(expected_settings):
        """
        Check if all requirements are met for a healthy vPool
        :param expected_settings: Parameters used to create a vPool, which will be verified
        :type expected_settings: dict

        :return: None
        """
        if not isinstance(expected_settings, dict) or len(expected_settings) == 0:
            raise ValueError('Cannot validate vpool when no settings are passed')

        generic_settings = expected_settings.values()[0]
        vpool_name = generic_settings['vpool_name']
        mountpoint = '/mnt/{0}'.format(vpool_name)
        backend_type = generic_settings['type']
        rdma_enabled = generic_settings['config_params']['dtl_transport'] == StorageDriverClient.FRAMEWORK_DTL_TRANSPORT_RSOCKET

        vpool = GeneralVPool.get_vpool_by_name(vpool_name=vpool_name)
        assert vpool is not None, 'Could not find vPool with name {0}'.format(vpool_name)
        vpool_config = GeneralVPool.get_configuration(vpool)

        # Verify some basic vPool attributes
        assert vpool.name == vpool_name, 'Expected name {0} for vPool'.format(vpool_name)
        assert vpool.backend_type.code == backend_type, 'Expected backend type {0}'.format(backend_type)
        assert vpool.status == VPool.STATUSES.RUNNING, 'vPool does not have RUNNING status'
        assert vpool.rdma_enabled == rdma_enabled, 'RDMA enabled setting is incorrect'
        assert set(expected_settings.keys()) == set([sd.storagerouter for sd in vpool.storagedrivers]), "vPool storagerouters don't match the expected Storage Routers"

        # Verify vPool Storage Driver configuration
        expected_vpool_config = copy.deepcopy(generic_settings['config_params'])
        for key, value in vpool_config.iteritems():
            if key == 'dtl_enabled' or key == 'tlog_multiplier':
                continue
            if key not in expected_vpool_config:
                raise ValueError('Expected settings does not contain key {0}'.format(key))

            if value != expected_vpool_config[key]:
                raise ValueError('vPool does not have expected configuration {0} for key {1}'.format(expected_vpool_config[key], key))
            expected_vpool_config.pop(key)

        if len(expected_vpool_config) > 0:
            raise ValueError('Actual vPool configuration does not contain keys: {0}'.format(', '.join(expected_vpool_config.keys())))

        # Prepare some fields to check
        config = generic_settings['config_params']
        dtl_mode = config['dtl_mode']
        sco_size = config['sco_size']
        dedupe_mode = config['dedupe_mode']
        cluster_size = config['cluster_size']
        write_buffer = config['write_buffer']
        dtl_transport = config['dtl_transport']
        cache_strategy = config['cache_strategy']
        # @TODO: Add more validations for other expected settings (instead of None)
        expected_config = {'backend_connection_manager': {'backend_interface_retries_on_error': 5,
                                                          'backend_interface_retry_interval_secs': 1,
                                                          'backend_interface_retry_backoff_multiplier': 2.0},
                           'content_addressed_cache': {'clustercache_mount_points': None,
                                                       'read_cache_serialization_path': u'/var/rsp/{0}'.format(vpool.name)},
                           'distributed_lock_store': {'dls_arakoon_cluster_id': None,
                                                      'dls_arakoon_cluster_nodes': None,
                                                      'dls_type': u'Arakoon'},
                           'distributed_transaction_log': {'dtl_path': None,
                                                           'dtl_transport': dtl_transport.upper()},
                           'event_publisher': {'events_amqp_routing_key': u'volumerouter',
                                               'events_amqp_uris': None},
                           'file_driver': {'fd_cache_path': None,
                                           'fd_extent_cache_capacity': u'1024',
                                           'fd_namespace': None},
                           'filesystem': {'fs_dtl_config_mode': u'Automatic',
                                          'fs_dtl_mode': u'{0}'.format(StorageDriverClient.VPOOL_DTL_MODE_MAP[dtl_mode]),
                                          'fs_enable_shm_interface': 1,
                                          'fs_file_event_rules': None,
                                          'fs_metadata_backend_arakoon_cluster_nodes': None,
                                          'fs_metadata_backend_mds_nodes': None,
                                          'fs_metadata_backend_type': u'MDS',
                                          'fs_raw_disk_suffix': None,
                                          'fs_virtual_disk_format': None},
                           'metadata_server': {'mds_nodes': None},
                           'scocache': {'backoff_gap': u'2GB',
                                        'scocache_mount_points': None,
                                        'trigger_gap': u'1GB'},
                           'threadpool_component': {'num_threads': 16},
                           'volume_manager': {'clean_interval': 1,
                                              'default_cluster_size': 1024 * cluster_size,
                                              'dtl_throttle_usecs': 4000,
                                              'metadata_path': None,
                                              'non_disposable_scos_factor': float(write_buffer) / StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size] / sco_size,
                                              'number_of_scos_in_tlog': StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size],
                                              'read_cache_default_behaviour': StorageDriverClient.VPOOL_CACHE_MAP[cache_strategy],
                                              'read_cache_default_mode': StorageDriverClient.VPOOL_DEDUPE_MAP[dedupe_mode],
                                              'tlog_path': None},
                           'volume_registry': {'vregistry_arakoon_cluster_id': u'voldrv',
                                               'vregistry_arakoon_cluster_nodes': None},
                           'volume_router': {'vrouter_backend_sync_timeout_ms': 5000,
                                             'vrouter_file_read_threshold': 1024,
                                             'vrouter_file_write_threshold': 1024,
                                             'vrouter_id': None,
                                             'vrouter_max_workers': 16,
                                             'vrouter_migrate_timeout_ms': 5000,
                                             'vrouter_min_workers': 4,
                                             'vrouter_redirect_timeout_ms': u'5000',
                                             'vrouter_routing_retries': 10,
                                             'vrouter_sco_multiplier': 1024,
                                             'vrouter_volume_read_threshold': 1024,
                                             'vrouter_volume_write_threshold': 1024},
                           'volume_router_cluster': {'vrouter_cluster_id': None}}
        vpool_services = {'all': ['ovs-watcher-volumedriver',
                                  'ovs-dtl_{0}'.format(vpool.name),
                                  'ovs-volumedriver_{0}'.format(vpool.name),
                                  'ovs-volumerouter-consumer'],
                          'extra': [],
                          'master': ['ovs-arakoon-voldrv']}
        sd_partitions = {'DB': ['MD', 'MDS', 'TLOG'],
                         'READ': ['None'],
                         'WRITE': ['FD', 'DTL', 'SCO'],
                         'SCRUB': ['None']}

        if backend_type == 'alba':
            backend_metadata = {'name': (str, None),
                                'preset': (str, Toolbox.regex_preset),
                                'backend_guid': (str, Toolbox.regex_guid),
                                'arakoon_config': (dict, None),
                                'connection': (dict, {'host': (str, Toolbox.regex_ip, False),
                                                      'port': (int, {'min': 1, 'max': 65535}),
                                                      'client_id': (str, Toolbox.regex_guid),
                                                      'client_secret': (str, None),
                                                      'local': (bool, None)}),
                                'backend_info': (dict, {'policies': (list, None),
                                                        'sco_size': (float, None),
                                                        'frag_size': (float, None),
                                                        'total_size': (float, None),
                                                        'nsm_partition_guids': (list, Toolbox.regex_guid)})}
            required = {'backend': (dict, backend_metadata),
                        'backend_aa': (dict, backend_metadata, False)}
            Toolbox.verify_required_params(required_params=required,
                                           actual_params=vpool.metadata)
            vpool_services['all'].append("ovs-albaproxy_{0}".format(vpool.name))
            sd_partitions['WRITE'].append('FCACHE')
            expected_config['backend_connection_manager'].update({'alba_connection_host': None,
                                                                  'alba_connection_port': None,
                                                                  'alba_connection_preset': None,
                                                                  'alba_connection_timeout': 15,
                                                                  'backend_type': u'{0}'.format(vpool.backend_type.code.upper())})
        elif backend_type == 'distributed':
            expected_config['backend_connection_manager'].update({'backend_type': u'LOCAL',
                                                                  'local_connection_path': u'{0}'.format(generic_settings['distributed_mountpoint'])})

        assert EtcdConfiguration.exists('/ovs/arakoon/voldrv/config', raw=True), 'Volumedriver arakoon does not exist'

        # Do some verifications for all SDs
        storage_ip = None
        voldrv_config = GeneralArakoon.get_config('voldrv')
        all_files = GeneralVPool.get_related_files(vpool=vpool)
        all_directories = GeneralVPool.get_related_directories(vpool=vpool)

        for storagedriver in vpool.storagedrivers:
            storagerouter = storagedriver.storagerouter
            root_client = SSHClient(storagerouter, username='******')

            assert EtcdConfiguration.exists('/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, storagedriver.storagedriver_id), raw=True), 'vPool config not found in etcd'
            current_config_sections = set([item for item in EtcdConfiguration.list('/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, storagedriver.storagedriver_id))])
            assert not current_config_sections.difference(set(expected_config.keys())), 'New section appeared in the storage driver config in etcd'
            assert not set(expected_config.keys()).difference(current_config_sections), 'Config section expected for storage driver, but not found in etcd'

            for key, values in expected_config.iteritems():
                current_config = EtcdConfiguration.get('/ovs/vpools/{0}/hosts/{1}/config/{2}'.format(vpool.guid, storagedriver.storagedriver_id, key))
                assert set(current_config.keys()).union(set(values.keys())) == set(values.keys()), 'Not all expected keys match for key "{0}" on Storage Driver {1}'.format(key, storagedriver.name)

                for sub_key, value in current_config.iteritems():
                    expected_value = values[sub_key]
                    if expected_value is None:
                        continue
                    assert value == expected_value, 'Key: {0} - Sub key: {1} - Value: {2} - Expected value: {3}'.format(key, sub_key, value, expected_value)

            # Check services
            if storagerouter.node_type == 'MASTER':
                for service_name in vpool_services['all'] + vpool_services['master']:
                    if service_name == 'ovs-arakoon-voldrv' and GeneralStorageDriver.has_role(storagedriver, 'DB') is False:
                        continue
                    if ServiceManager.get_service_status(name=service_name,
                                                         client=root_client) is not True:
                        raise ValueError('Service {0} is not running on node {1}'.format(service_name, storagerouter.ip))
            else:
                for service_name in vpool_services['all'] + vpool_services['extra']:
                    if ServiceManager.get_service_status(name=service_name,
                                                         client=root_client) is not True:
                        raise ValueError('Service {0} is not running on node {1}'.format(service_name, storagerouter.ip))

            # Check arakoon config
            if not voldrv_config.has_section(storagerouter.machine_id):
                raise ValueError('Voldrv arakoon cluster does not have section {0}'.format(storagerouter.machine_id))

            # Basic SD checks
            assert storagedriver.cluster_ip == storagerouter.ip, 'Incorrect cluster IP. Expected: {0}  -  Actual: {1}'.format(storagerouter.ip, storagedriver.cluster_ip)
            assert storagedriver.mountpoint == '/mnt/{0}'.format(vpool.name), 'Incorrect mountpoint. Expected: {0}  -  Actual: {1}'.format(mountpoint, storagedriver.mountpoint)
            if storage_ip is not None:
                assert storagedriver.storage_ip == storage_ip, 'Incorrect storage IP. Expected: {0}  -  Actual: {1}'.format(storage_ip, storagedriver.storage_ip)
            storage_ip = storagedriver.storage_ip

            # Check required directories and files
            if storagerouter.guid not in all_directories:
                raise ValueError('Could not find directory information for Storage Router {0}'.format(storagerouter.ip))
            if storagerouter.guid not in all_files:
                raise ValueError('Could not find file information for Storage Router {0}'.format(storagerouter.ip))

            for directory in all_directories[storagerouter.guid]:
                if root_client.dir_exists(directory) is False:
                    raise ValueError('Directory {0} does not exist on Storage Router {1}'.format(directory, storagerouter.ip))
            for file_name in all_files[storagerouter.guid]:
                if root_client.file_exists(file_name) is False:
                    raise ValueError('File {0} does not exist on Storage Router {1}'.format(file_name, storagerouter.ip))

            for partition in storagedriver.partitions:
                if partition.role in sd_partitions and partition.sub_role in sd_partitions[partition.role]:
                    sd_partitions[partition.role].remove(partition.sub_role)
                elif partition.role in sd_partitions and partition.sub_role is None:
                    sd_partitions[partition.role].remove('None')

            # Verify vPool writeable
            if storagerouter.pmachine.hvtype == 'VMWARE':
                GeneralVPool.mount_vpool(vpool=vpool,
                                         root_client=root_client)

            vdisk = GeneralVDisk.create_volume(size=10,
                                               vpool=vpool,
                                               root_client=root_client)
            GeneralVDisk.write_to_volume(vdisk=vdisk,
                                         vpool=vpool,
                                         root_client=root_client,
                                         count=10,
                                         bs='1M',
                                         input_type='random')
            GeneralVDisk.delete_volume(vdisk=vdisk,
                                       vpool=vpool,
                                       root_client=root_client)

        for role, sub_roles in sd_partitions.iteritems():
            for sub_role in sub_roles:
                raise ValueError('Not a single Storage Driver found with partition role {0} and sub-role {1}'.format(role, sub_role))
示例#35
0
    def dtl_checkup(vpool_guid=None, vdisk_guid=None, storagerouters_to_exclude=None):
        """
        Check DTL for all volumes
        :param vpool_guid:                vPool to check the DTL configuration of all its disks
        :type vpool_guid:                 String

        :param vdisk_guid:                Virtual Disk to check its DTL configuration
        :type vdisk_guid:                 String

        :param storagerouters_to_exclude: Storage Routers to exclude from possible targets
        :type storagerouters_to_exclude:  List

        :return:                          None
        """
        if vpool_guid is not None and vdisk_guid is not None:
            raise ValueError('vpool and vdisk are mutually exclusive')
        if storagerouters_to_exclude is None:
            storagerouters_to_exclude = []

        from ovs.lib.vpool import VPoolController

        logger.info('DTL checkup started')
        required_params = {'dtl_mode': (str, StorageDriverClient.VPOOL_DTL_MODE_MAP.keys()),
                           'dtl_enabled': (bool, None)}
        vdisk = VDisk(vdisk_guid) if vdisk_guid else None
        vpool = VPool(vpool_guid) if vpool_guid else None
        errors_found = False
        root_client_map = {}
        vpool_dtl_config_cache = {}
        vdisks = VDiskList.get_vdisks() if vdisk is None and vpool is None else vpool.vdisks if vpool is not None else [vdisk]
        for vdisk in vdisks:
            logger.info('    Verifying vDisk {0} with guid {1}'.format(vdisk.name, vdisk.guid))
            vdisk.invalidate_dynamics(['storagedriver_client', 'storagerouter_guid'])
            if vdisk.storagedriver_client is None:
                continue

            vpool = vdisk.vpool
            if vpool.guid not in vpool_dtl_config_cache:
                vpool_config = VPoolController.get_configuration(vpool.guid)  # Config on vPool is permanent for DTL settings
                vpool_dtl_config_cache[vpool.guid] = vpool_config
                Toolbox.verify_required_params(required_params, vpool_config)

            volume_id = str(vdisk.volume_id)
            vpool_config = vpool_dtl_config_cache[vpool.guid]
            dtl_vpool_enabled = vpool_config['dtl_enabled']
            try:
                current_dtl_config = vdisk.storagedriver_client.get_dtl_config(volume_id)
                current_dtl_config_mode = vdisk.storagedriver_client.get_dtl_config_mode(volume_id)
            except RuntimeError as rte:
                # Can occur when a volume has not been stolen yet from a dead node
                logger.error('Retrieving DTL configuration from storage driver failed with error: {0}'.format(rte))
                errors_found = True
                continue

            if dtl_vpool_enabled is False and (current_dtl_config is None or current_dtl_config.host == 'null'):
                logger.info('    DTL is globally disabled for vPool {0} with guid {1}'.format(vpool.name, vpool.guid))
                vdisk.storagedriver_client.set_manual_dtl_config(volume_id, None)
                continue
            elif current_dtl_config_mode == DTLConfigMode.MANUAL and (current_dtl_config is None or current_dtl_config.host == 'null'):
                logger.info('    DTL is disabled for virtual disk {0} with guid {1}'.format(vdisk.name, vdisk.guid))
                continue

            storage_router = StorageRouter(vdisk.storagerouter_guid)
            available_storagerouters = []
            # 1. Check available storage routers in the backup failure domain
            if storage_router.secondary_failure_domain is not None:
                for storagerouter in storage_router.secondary_failure_domain.primary_storagerouters:
                    if vpool.guid not in storagerouter.vpools_guids:
                        continue
                    if storagerouter not in root_client_map:
                        try:
                            root_client = SSHClient(storagerouter, username='******')
                        except UnableToConnectException:
                            logger.warning('    Storage Router with IP {0} of vDisk {1} is not reachable'.format(storagerouter.ip, vdisk.name))
                            continue
                        root_client_map[storagerouter] = root_client
                    else:
                        root_client = root_client_map[storagerouter]
                    if ServiceManager.get_service_status('dtl_{0}'.format(vpool.name), client=root_client) is True:
                        available_storagerouters.append(storagerouter)
            # 2. Check available storage routers in the same failure domain as current storage router
            if len(available_storagerouters) == 0:
                for storagerouter in storage_router.primary_failure_domain.primary_storagerouters:
                    if vpool.guid not in storagerouter.vpools_guids or storagerouter == storage_router:
                        continue
                    if storagerouter not in root_client_map:
                        try:
                            root_client = SSHClient(storagerouter, username='******')
                        except UnableToConnectException:
                            logger.warning('    Storage Router with IP {0} of vDisk {1} is not reachable'.format(storagerouter.ip, vdisk.name))
                            continue
                        root_client_map[storagerouter] = root_client
                    else:
                        root_client = root_client_map[storagerouter]
                    if ServiceManager.get_service_status('dtl_{0}'.format(vpool.name), client=root_client) is True:
                        available_storagerouters.append(storagerouter)

            # Remove storage routers to exclude
            for sr_guid in storagerouters_to_exclude:
                sr_to_exclude = StorageRouter(sr_guid)
                if sr_to_exclude in available_storagerouters:
                    available_storagerouters.remove(sr_to_exclude)

            if len(available_storagerouters) == 0:
                logger.info('    No Storage Routers could be found as valid DTL target')
                vdisk.storagedriver_client.set_manual_dtl_config(volume_id, None)
                continue

            # Check whether reconfiguration is required
            reconfigure_required = False
            if current_dtl_config is None:
                logger.info('        No DTL configuration found, but there are Storage Routers available')
                reconfigure_required = True
            elif current_dtl_config_mode == DTLConfigMode.AUTOMATIC:
                logger.info('        DTL configuration set to AUTOMATIC, switching to manual')
                reconfigure_required = True
            else:
                dtl_host = current_dtl_config.host
                dtl_port = current_dtl_config.port
                storage_drivers = [sd for sd in vpool.storagedrivers if sd.storagerouter.ip == dtl_host]

                logger.info('        DTL host: {0}'.format(dtl_host or '-'))
                logger.info('        DTL port: {0}'.format(dtl_port or '-'))
                if dtl_host not in [sr.ip for sr in available_storagerouters]:
                    logger.info('        Host not in available Storage Routers')
                    reconfigure_required = True
                elif dtl_port != storage_drivers[0].ports[2]:
                    logger.info('        Configured port does not match expected port ({0} vs {1})'.format(dtl_port, storage_drivers[0].ports[2]))
                    reconfigure_required = True

            # Perform the reconfiguration
            if reconfigure_required is True:
                logger.info('        Reconfigure required')
                index = random.randint(0, len(available_storagerouters) - 1)
                dtl_target = available_storagerouters[index]
                storage_drivers = [sd for sd in vpool.storagedrivers if sd.storagerouter == dtl_target]
                if len(storage_drivers) == 0:
                    raise ValueError('Could not retrieve related storagedriver')

                port = storage_drivers[0].ports[2]
                vpool_dtl_mode = vpool_config.get('dtl_mode', StorageDriverClient.FRAMEWORK_DTL_ASYNC)
                logger.info('        DTL config that will be set -->  Host: {0}, Port: {1}, Mode: {2}'.format(dtl_target.ip, port, vpool_dtl_mode))
                dtl_config = DTLConfig(str(dtl_target.ip), port, StorageDriverClient.VDISK_DTL_MODE_MAP[vpool_dtl_mode])
                vdisk.storagedriver_client.set_manual_dtl_config(volume_id, dtl_config)
        if errors_found is True:
            logger.error('DTL checkup ended with errors')
            raise Exception('DTL checkup failed with errors. Please check /var/log/ovs/lib.log for more information')
        logger.info('DTL checkup ended')
示例#36
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        ScheduledTaskController._logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    ScheduledTaskController._logger.info('Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'.format(storage_driver.storagerouter.ip, partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            sshclient = SSHClient(storage_driver.storagerouter)
                            # Use ServiceManager(sshclient) to make sure ovs-workers are actually running
                            if ServiceManager.get_service_status('workers', sshclient) is False:
                                ScheduledTaskController._logger.warning('Gather Scrub - Storage Router {0:<15} - workers are not running'.format(storage_driver.storagerouter.ip))
                            else:
                                scrub_locations[storage_driver.storagerouter] = str(partition.path)
                        except UnableToConnectException:
                            ScheduledTaskController._logger.warning('Gather Scrub - Storage Router {0:<15} is not reachable'.format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        if len(vdisk_guids) == 0:
            ScheduledTaskController._logger.info('Gather Scrub - No scrub work needed'.format(len(vdisk_guids)))
            return

        ScheduledTaskController._logger.info('Gather Scrub - Checking {0} volumes for scrub work'.format(len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = {}
        storage_router_list = []
        scrub_map = {}

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            ScheduledTaskController._logger.info('Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'.format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set[storage_router.ip] = ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_info[1],
                                                                                              vdisk_guids=vdisk_guids_to_scrub).apply_async(routing_key='sr.{0}'.format(storage_router.machine_id))
                storage_router_list.append(storage_router)
                scrub_map[storage_router.ip] = vdisk_guids_to_scrub

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location,
                                                                              vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(local_storage_router.ip, ex))

        all_results, failed_nodes = CeleryToolbox.manage_running_tasks(result_set,
                                                                       timesleep=60)  # Check every 60 seconds if tasks are still running

        for ip, result in all_results.iteritems():
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(ip, result))

        result_set = {}
        for failed_node in failed_nodes:
            ScheduledTaskController._logger.warning('Scrubbing failed on node {0}. Will reschedule on another node.'.format(failed_node))
            vdisk_guids_to_scrub = scrub_map[failed_node]
            rescheduled_work = False
            for storage_router, scrub_location in scrub_locations.items():
                if storage_router.ip not in failed_nodes:
                    if storage_router.machine_id != local_machineid:
                        ScheduledTaskController._logger.info('Rescheduled scrub work from node {0} to node {1}.'.format(failed_node, storage_router.ip))
                        result_set[storage_router.ip] = ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_location,
                                                                                                      vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                                                                                      routing_key='sr.{0}'.format(storage_router.machine_id))
                        storage_router_list.append(storage_router)
                        rescheduled_work = True
                        break
            if rescheduled_work is False:
                if local_scrub_location is not None:
                    try:
                        processed_guids.extend(ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location,
                                                                                           vdisk_guids=vdisk_guids_to_scrub))
                    except Exception as ex:
                        ScheduledTaskController._logger.error(
                            'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}'.format(ex))
                else:
                    ScheduledTaskController._logger.warning('No nodes left to reschedule work from node {0}'.format(failed_node))

        if len(result_set) > 0:
            all_results2, failed_nodes = CeleryToolbox.manage_running_tasks(result_set,
                                                                            timesleep=60)  # Check every 60 seconds if tasks are still running

            for ip, result in all_results2.iteritems():
                if isinstance(result, list):
                    processed_guids.extend(result)
                else:
                    ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(ip, result))

        if len(set(processed_guids)) != len(vdisk_guids) or set(processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        ScheduledTaskController._logger.info('Gather Scrub - Finished')
示例#37
0
    def execute_scrub_work(queue, vpool, scrub_info, error_messages):
        """
        Executes scrub work for a given vDisk queue and vPool, based on scrub_info
        :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool)
        :type queue: Queue
        :param vpool: the vPool object of the vDisks
        :type vpool: VPool
        :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter
                           that needs to do the work
        :type scrub_info: dict
        :param error_messages: A list of error messages to be filled
        :type error_messages: list
        :return: a list of error messages
        :rtype: list
        """
        def _verify_mds_config(current_vdisk):
            current_vdisk.invalidate_dynamics('info')
            vdisk_configs = current_vdisk.info['metadata_backend_config']
            if len(vdisk_configs) == 0:
                raise RuntimeError('Could not load MDS configuration')
            return vdisk_configs

        client = None
        lock_time = 5 * 60
        storagerouter = scrub_info['storage_router']
        scrub_directory = '{0}/scrub_work_{1}_{2}'.format(
            scrub_info['scrub_path'], vpool.name, storagerouter.name)
        scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(
            vpool.guid, storagerouter.guid)
        backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(
            vpool.guid, storagerouter.guid)
        alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(
            vpool.name, storagerouter.name)

        # Deploy a proxy
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_create(scrub_directory)
                client.dir_chmod(
                    scrub_directory, 0777
                )  # Celery task executed by 'ovs' user and should be able to write in it
                if ServiceManager.has_service(
                        name=alba_proxy_service, client=client
                ) is True and ServiceManager.get_service_status(
                        name=alba_proxy_service, client=client) is True:
                    ScheduledTaskController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))
                    scrub_config = Configuration.get(scrub_config_key)
                else:
                    machine_id = System.get_my_machine_id(client)
                    port_range = Configuration.get(
                        '/ovs/framework/hosts/{0}/ports|storagedriver'.format(
                            machine_id))
                    port = System.get_free_ports(selected_range=port_range,
                                                 nr=1,
                                                 client=client)[0]
                    # Scrub config
                    # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini',
                    #  u'fragment_cache': [u'none'],
                    #  u'ips': [u'127.0.0.1'],
                    #  u'log_level': u'info',
                    #  u'manifest_cache_size': 17179869184,
                    #  u'port': 0,
                    #  u'transport': u'tcp'}

                    # Backend config
                    # {u'alba_connection_host': u'10.100.193.155',
                    #  u'alba_connection_port': 26204,
                    #  u'alba_connection_preset': u'preset',
                    #  u'alba_connection_timeout': 15,
                    #  u'alba_connection_transport': u'TCP',
                    #  u'backend_interface_retries_on_error': 5,
                    #  u'backend_interface_retry_backoff_multiplier': 2.0,
                    #  u'backend_interface_retry_interval_secs': 1,
                    #  u'backend_type': u'ALBA'}
                    scrub_config = Configuration.get(
                        'ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(
                            vpool.guid))
                    scrub_config['port'] = port
                    scrub_config['transport'] = 'tcp'
                    Configuration.set(scrub_config_key,
                                      json.dumps(scrub_config, indent=4),
                                      raw=True)

                    params = {
                        'VPOOL_NAME':
                        vpool.name,
                        'LOG_SINK':
                        LogHandler.get_sink_path('alba_proxy'),
                        'CONFIG_PATH':
                        Configuration.get_configuration_path(scrub_config_key)
                    }
                    ServiceManager.add_service(name='ovs-albaproxy',
                                               params=params,
                                               client=client,
                                               target_name=alba_proxy_service)
                    ServiceManager.start_service(name=alba_proxy_service,
                                                 client=client)
                    ScheduledTaskController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))

                backend_config = Configuration.get(
                    'ovs/vpools/{0}/hosts/{1}/config'.format(
                        vpool.guid, vpool.storagedrivers[0].storagedriver_id
                    ))['backend_connection_manager']
                backend_config['alba_connection_host'] = '127.0.0.1'
                backend_config['alba_connection_port'] = scrub_config['port']
                Configuration.set(
                    backend_config_key,
                    json.dumps({"backend_connection_manager": backend_config},
                               indent=4),
                    raw=True)
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
            if client is not None and ServiceManager.has_service(
                    name=alba_proxy_service, client=client) is True:
                if ServiceManager.get_service_status(name=alba_proxy_service,
                                                     client=client) is True:
                    ServiceManager.stop_service(name=alba_proxy_service,
                                                client=client)
                ServiceManager.remove_service(name=alba_proxy_service,
                                              client=client)
            if Configuration.exists(scrub_config_key):
                Configuration.delete(scrub_config_key)

        try:
            # Empty the queue with vDisks to scrub
            with remote(storagerouter.ip, [VDisk]) as rem:
                while True:
                    vdisk = None
                    vdisk_guid = queue.get(False)
                    try:
                        # Check MDS master is local. Trigger MDS handover if necessary
                        vdisk = rem.VDisk(vdisk_guid)
                        ScheduledTaskController._logger.info(
                            'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'
                            .format(vpool.name, storagerouter.name, vdisk.name,
                                    scrub_directory))
                        configs = _verify_mds_config(current_vdisk=vdisk)
                        storagedriver = StorageDriverList.get_by_storagedriver_id(
                            vdisk.storagedriver_id)
                        if configs[0].get(
                                'ip') != storagedriver.storagerouter.ip:
                            ScheduledTaskController._logger.info(
                                'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'
                                .format(vpool.name, storagerouter.name,
                                        vdisk.name))
                            MDSServiceController.ensure_safety(
                                VDisk(vdisk_guid)
                            )  # Do not use a remote VDisk instance here
                            configs = _verify_mds_config(current_vdisk=vdisk)
                            if configs[0].get(
                                    'ip') != storagedriver.storagerouter.ip:
                                ScheduledTaskController._logger.warning(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name))
                                continue

                        # Do the actual scrubbing
                        with vdisk.storagedriver_client.make_locked_client(
                                str(vdisk.volume_id)) as locked_client:
                            ScheduledTaskController._logger.info(
                                'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'
                                .format(vpool.name, storagerouter.name,
                                        vdisk.name))
                            work_units = locked_client.get_scrubbing_workunits(
                            )
                            for work_unit in work_units:
                                res = locked_client.scrub(
                                    work_unit=work_unit,
                                    scratch_dir=scrub_directory,
                                    log_sinks=[
                                        LogHandler.get_sink_path(
                                            'scrubber', allow_override=True)
                                    ],
                                    backend_config=Configuration.
                                    get_configuration_path(backend_config_key))
                                locked_client.apply_scrubbing_result(
                                    scrubbing_work_result=res)
                            if work_units:
                                ScheduledTaskController._logger.info(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name, len(work_units)))
                            else:
                                ScheduledTaskController._logger.info(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name))
                    except Exception:
                        if vdisk is None:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(
                                vpool.name, storagerouter.name, vdisk_guid)
                        else:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(
                                vpool.name, storagerouter.name, vdisk.name)
                        error_messages.append(message)
                        ScheduledTaskController._logger.exception(message)

        except Empty:  # Raised when all items have been fetched from the queue
            ScheduledTaskController._logger.info(
                'Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'
                .format(vpool.name, storagerouter.name))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(
                vpool.name, storagerouter.name)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)

        # Delete the proxy again
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_delete(scrub_directory)
                if ServiceManager.has_service(alba_proxy_service,
                                              client=client):
                    ServiceManager.stop_service(alba_proxy_service,
                                                client=client)
                    ServiceManager.remove_service(alba_proxy_service,
                                                  client=client)
                if Configuration.exists(scrub_config_key):
                    Configuration.delete(scrub_config_key)
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
示例#38
0
    def is_host_configured(self, ip):
        if (
                self._is_devstack is False and self._is_openstack is False
        ) or self._cinder_installed is False or self._nova_installed is False:
            self._logger.warning(
                'Host configured: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed'
            )
            return False

        # 1. Check driver code
        if self._is_devstack is True:
            if not self.client.file_exists(filename=self._devstack_driver):
                self._logger.info('  File "{0}" does not exist'.format(
                    self._devstack_driver))
                return False
        else:
            if not self.client.file_exists(
                    filename='{0}/cinder/volume/drivers/openvstorage.py'.
                    format(self._driver_location)):
                self._logger.info(
                    '  File "{0}/cinder/volume/drivers/openvstorage.py" does not exist'
                    .format(self._driver_location))
                return False

        # 2. Check configured users
        ovs_id = self.client.run('id -u ovs')
        if not ovs_id:
            self._logger.info('Failed to determine the OVS user group ID')
            return False

        users = ['libvirt-qemu', 'stack'
                 ] if self._is_devstack is True else self._openstack_users
        for user in users:
            if '{0}(ovs)'.format(ovs_id) not in self.client.run(
                    'id -a {0}'.format(user)):
                self._logger.info(
                    'User "{0}" is not part of the OVS user group')
                return False

        # 3. Check patches
        nova_base_path = self._get_base_path('nova')
        cinder_base_path = self._get_base_path('cinder')
        if self._stack_version in ('liberty', 'mitaka', 'newton'):
            try:
                import os_brick
                cinder_brick_initiator_file = "{0}/initiator/connector.py".format(
                    os.path.dirname(os_brick.__file__))
            except ImportError:
                cinder_brick_initiator_file = ''
            if self._is_devstack is True:
                nova_volume_file = '{0}/virt/libvirt/volume/volume.py'.format(
                    nova_base_path)
            else:
                nova_volume_file = '{0}/nova/virt/libvirt/volume/volume.py'.format(
                    self._driver_location)
        else:
            if self._is_devstack is True:
                nova_volume_file = '{0}/virt/libvirt/volume.py'.format(
                    nova_base_path)
            else:
                nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format(
                    self._driver_location)
            cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format(
                cinder_base_path)

        if self._is_devstack is True:
            nova_driver_file = '{0}/virt/libvirt/driver.py'.format(
                nova_base_path)
        else:
            nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format(
                self._driver_location)

        file_contents = self.client.file_read(nova_volume_file)
        if 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):' not in file_contents:
            self._logger.info('File "{0}" is not configured properly'.format(
                nova_volume_file))
            return False

        if self._stack_version in ('liberty', 'mitaka'):
            check_line = 'file=nova.virt.libvirt.volume.volume.LibvirtFileVolumeDriver'
        else:
            check_line = 'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver'

        file_contents = self.client.file_read(nova_driver_file)
        if check_line not in file_contents:
            self._logger.info('File "{0}" is not configured properly'.format(
                nova_driver_file))
            return False

        if os.path.exists(cinder_brick_initiator_file):
            file_contents = self.client.file_read(cinder_brick_initiator_file)
            if self._stack_version in ('liberty', 'mitaka', 'newton'):
                if 'elif protocol in [LOCAL, "FILE"]:' not in file_contents:
                    self._logger.info(
                        'File "{0}" is not configured properly'.format(
                            cinder_brick_initiator_file))
                    return False
            else:
                if 'elif protocol in ["LOCAL", "FILE"]:' not in file_contents:
                    self._logger.info(
                        'File "{0}" is not configured properly'.format(
                            cinder_brick_initiator_file))
                    return False

        # 4. Check messaging driver configuration
        nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'
        cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'

        host_configured = True
        with remote(ip, [RawConfigParser], 'root') as rem:
            for config_file, driver in {
                    self._NOVA_CONF: nova_messaging_driver,
                    self._CINDER_CONF: cinder_messaging_driver
            }.iteritems():
                cfg = rem.RawConfigParser()
                cfg.read([config_file])
                host_configured &= cfg.get("DEFAULT",
                                           "notification_driver") == driver
                host_configured &= "notifications" in cfg.get(
                    "DEFAULT", "notification_topics")

                if config_file == self._NOVA_CONF:
                    host_configured &= cfg.get(
                        "DEFAULT", "notify_on_any_change") == "True"
                    host_configured &= cfg.get(
                        "DEFAULT",
                        "notify_on_state_change") == "vm_and_task_state"

        if host_configured is False:
            self._logger.info(
                'Nova and/or Cinder configuration files are not configured properly'
            )
            return host_configured

        # 5. Check events consumer service
        service_name = 'ovs-openstack-events-consumer'
        if not (ServiceManager.has_service(service_name, self.client)
                and ServiceManager.get_service_status(service_name,
                                                      self.client) is True):
            self._logger.info(
                'Service "{0}" is not configured properly'.format(
                    service_name))
            return False

        return True