Пример #1
0
    def restart_framework_and_memcache_services(clients, logger, offline_node_ips=None):
        """
        Restart framework and Memcached services
        :param clients: Clients on which to restart these services
        :type clients: dict
        :param logger: Logger object used for logging
        :type logger: ovs.log.log_handler.LogHandler
        :param offline_node_ips: IP addresses of offline nodes in the cluster
        :type offline_node_ips: list
        :return: None
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        master_ips = [sr.ip for sr in StorageRouterList.get_masters()]
        slave_ips = [sr.ip for sr in StorageRouterList.get_slaves()]
        if offline_node_ips is None:
            offline_node_ips = []
        memcached = 'memcached'
        watcher = 'watcher-framework'
        support_agent = 'support-agent'
        for ip in master_ips + slave_ips:
            if ip not in offline_node_ips:
                if ServiceManager.has_service(watcher, clients[ip]):
                    Toolbox.change_service_state(clients[ip], watcher, 'stop', logger)
        for ip in master_ips:
            if ip not in offline_node_ips:
                Toolbox.change_service_state(clients[ip], memcached, 'restart', logger)
        for ip in master_ips + slave_ips:
            if ip not in offline_node_ips:
                if ServiceManager.has_service(watcher, clients[ip]):
                    Toolbox.change_service_state(clients[ip], watcher, 'start', logger)
                if ServiceManager.has_service(support_agent, clients[ip]):
                    Toolbox.change_service_state(clients[ip], support_agent, 'restart', logger)
        VolatileFactory.store = None
    def _voldrv_arakoon_checkup(create_cluster):
        def add_service(service_storagerouter, arakoon_result):
            """
            Add a service to the storage router
            :param service_storagerouter: Storage Router to add the service to
            :type service_storagerouter:  StorageRouter

            :param arakoon_result:        Port information
            :type arakoon_result:         Dictionary

            :return:                      The newly created and added service
            """
            new_service = Service()
            new_service.name = service_name
            new_service.type = service_type
            new_service.ports = [arakoon_result['client_port'], arakoon_result['messaging_port']]
            new_service.storagerouter = service_storagerouter
            new_service.save()
            return new_service

        cluster_name = 'voldrv'
        service_name = 'arakoon-voldrv'
        service_type = ServiceTypeList.get_by_name('Arakoon')
        current_services = []
        current_ips = []
        for service in service_type.services:
            if service.name == service_name:
                current_services.append(service)
                current_ips.append(service.storagerouter.ip)
        all_sr_ips = [storagerouter.ip for storagerouter in StorageRouterList.get_slaves()]
        available_storagerouters = {}
        for storagerouter in StorageRouterList.get_masters():
            storagerouter.invalidate_dynamics(['partition_config'])
            if len(storagerouter.partition_config[DiskPartition.ROLES.DB]) > 0:
                available_storagerouters[storagerouter] = DiskPartition(storagerouter.partition_config[DiskPartition.ROLES.DB][0])
            all_sr_ips.append(storagerouter.ip)
        if create_cluster is True and len(current_services) == 0 and len(available_storagerouters) > 0:
            storagerouter, partition = available_storagerouters.items()[0]
            result = ArakoonInstaller.create_cluster(cluster_name=cluster_name,
                                                     ip=storagerouter.ip,
                                                     base_dir=partition.folder)
            current_services.append(add_service(storagerouter, result))
            ArakoonInstaller.restart_cluster_add(cluster_name, current_ips, storagerouter.ip)
            current_ips.append(storagerouter.ip)
            StorageDriverController._configure_arakoon_to_volumedriver()

        if 0 < len(current_services) < len(available_storagerouters):
            for storagerouter, partition in available_storagerouters.iteritems():
                if storagerouter.ip in current_ips:
                    continue
                result = ArakoonInstaller.extend_cluster(
                    current_services[0].storagerouter.ip,
                    storagerouter.ip,
                    cluster_name,
                    partition.folder
                )
                add_service(storagerouter, result)
                current_ips.append(storagerouter.ip)
                ArakoonInstaller.restart_cluster_add(cluster_name, current_ips, storagerouter.ip)
            StorageDriverController._configure_arakoon_to_volumedriver()
Пример #3
0
    def _dtl_status(self):
        """
        Retrieve the DTL status for a vDisk
        """
        sd_status = self.info.get('failover_mode', 'UNKNOWN').lower()
        if sd_status == '':
            sd_status = 'unknown'
        if sd_status != 'ok_standalone':
            return sd_status

        # Verify whether 'ok_standalone' is the correct status for this vDisk
        vpool_dtl = self.vpool.configuration['dtl_enabled']
        if self.has_manual_dtl is True or vpool_dtl is False:
            return sd_status

        domains = []
        possible_dtl_targets = set()
        for sr in StorageRouterList.get_storagerouters():
            if sr.guid == self.storagerouter_guid:
                domains = [junction.domain for junction in sr.domains]
            elif len(sr.storagedrivers) > 0:
                possible_dtl_targets.add(sr)

        if len(domains) > 0:
            possible_dtl_targets = set()
            for domain in domains:
                possible_dtl_targets.update(StorageRouterList.get_primary_storagerouters_for_domain(domain))

        if len(possible_dtl_targets) == 0:
            return sd_status
        return 'checkup_required'
Пример #4
0
    def _dtl_status(self):
        """
        Retrieve the DTL status for a vDisk
        """
        sd_status = self.info.get('failover_mode', 'UNKNOWN').lower()
        if sd_status == '':
            sd_status = 'unknown'
        if sd_status != 'ok_standalone':
            return sd_status

        # Verify whether 'ok_standalone' is the correct status for this vDisk
        vpool_dtl = self.vpool.configuration['dtl_enabled']
        if self.has_manual_dtl is True or vpool_dtl is False:
            return sd_status

        domains = []
        possible_dtl_targets = set()
        for sr in StorageRouterList.get_storagerouters():
            if sr.guid == self.storagerouter_guid:
                domains = [junction.domain for junction in sr.domains]
            elif len(sr.storagedrivers) > 0:
                possible_dtl_targets.add(sr)

        if len(domains) > 0:
            possible_dtl_targets = set()
            for domain in domains:
                possible_dtl_targets.update(
                    StorageRouterList.get_primary_storagerouters_for_domain(
                        domain))

        if len(possible_dtl_targets) == 0:
            return sd_status
        return 'checkup_required'
Пример #5
0
    def install_plugins():
        """
        (Re)load plugins
        """
        manager = ServiceFactory.get_manager()
        if manager.has_service('ovs-watcher-framework',
                               SSHClient('127.0.0.1', username='******')):
            # If the watcher is running, 'ovs setup' was executed and we need to restart everything to load
            # the plugin. In the other case, the plugin will be loaded once 'ovs setup' is executed
            print 'Installing plugin into Open vStorage'
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            clients = {}
            masters = StorageRouterList.get_masters()
            slaves = StorageRouterList.get_slaves()
            try:
                for sr in masters + slaves:
                    clients[sr] = SSHClient(sr, username='******')
            except UnableToConnectException:
                raise RuntimeError('Not all StorageRouters are reachable')
            memcached = 'memcached'
            watcher = 'watcher-framework'
            for sr in masters + slaves:
                if manager.has_service(watcher, clients[sr]):
                    print '- Stopping watcher on {0} ({1})'.format(
                        sr.name, sr.ip)
                    manager.stop_service(watcher, clients[sr])
            for sr in masters:
                print '- Restarting memcached on {0} ({1})'.format(
                    sr.name, sr.ip)
                manager.restart_service(memcached, clients[sr])
            for sr in masters + slaves:
                if manager.has_service(watcher, clients[sr]):
                    print '- Starting watcher on {0} ({1})'.format(
                        sr.name, sr.ip)
                    manager.start_service(watcher, clients[sr])

            print '- Execute model migrations'
            from ovs.dal.helpers import Migration
            Migration.migrate()

            from ovs.lib.helpers.toolbox import Toolbox
            ip = System.get_my_storagerouter().ip
            functions = Toolbox.fetch_hooks('plugin', 'postinstall')
            if len(functions) > 0:
                print '- Execute post installation scripts'
            for fct in functions:
                fct(ip=ip)
            print 'Installing plugin into Open vStorage: Completed'
Пример #6
0
 def process_response(self, request, response):
     """
     Processes responses
     """
     _ = self
     # Timings
     if isinstance(response, OVSResponse):
         if hasattr(request, '_entry_time'):
             # noinspection PyProtectedMember
             response.timings['total'] = [
                 time.time() - request._entry_time, 'Total'
             ]
         response.build_timings()
     # Process CORS responses
     if 'HTTP_ORIGIN' in request.META:
         path = request.path
         storagerouters = StorageRouterList.get_storagerouters()
         allowed_origins = [
             'https://{0}'.format(storagerouter.ip)
             for storagerouter in storagerouters
         ]
         if request.META[
                 'HTTP_ORIGIN'] in allowed_origins or '/swagger.json' in path:
             response['Access-Control-Allow-Origin'] = request.META[
                 'HTTP_ORIGIN']
             response[
                 'Access-Control-Allow-Headers'] = 'x-requested-with, content-type, accept, origin, authorization'
             response[
                 'Access-Control-Allow-Methods'] = 'GET, POST, PUT, PATCH, DELETE, OPTIONS'
     return response
 def get_available_arakoon_storagerouters(cls, ssh_clients=None):
     # type: (Optional[Dict[StorageRouter, SSHClient]]) -> Dict[StorageRouter, DiskPartition]
     """
     Retrieves all Storagerouters which are suitable to deploy Arakoons on
     :return: Set of all Storagerouters that are suitable
     :rtype: Dict[StorageRouter, DiskPartition]
     """
     ssh_clients = ssh_clients or {}
     available_storagerouters = {}
     masters = StorageRouterList.get_masters()
     for storagerouter in masters:
         try:
             partition = AlbaArakoonInstaller.get_db_partition(
                 storagerouter)
             try:
                 if ssh_clients:
                     client = ssh_clients.get(storagerouter)
                 else:
                     client = SSHClient(storagerouter)
                 if client:
                     available_storagerouters[storagerouter] = partition
             except UnableToConnectException:
                 cls._logger.warning(
                     'Storage Router with IP {0} is not reachable'.format(
                         storagerouter.ip))
         except ValueError:
             pass  # Ignore storagerouters without DB parition
     return available_storagerouters
    def register(node_id):
        """
        Adds a Node with a given node_id to the model
        :param node_id: ID of the ALBA node
        :type node_id: str

        :return: None
        """
        node = AlbaNodeList.get_albanode_by_node_id(node_id)
        if node is None:
            main_config = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id))
            node = AlbaNode()
            node.ip = main_config['ip']
            node.port = main_config['port']
            node.username = main_config['username']
            node.password = main_config['password']
            node.storagerouter = StorageRouterList.get_by_ip(main_config['ip'])
        data = node.client.get_metadata()
        if data['_success'] is False and data['_error'] == 'Invalid credentials':
            raise RuntimeError('Invalid credentials')
        if data['node_id'] != node_id:
            AlbaNodeController._logger.error('Unexpected node_id: {0} vs {1}'.format(data['node_id'], node_id))
            raise RuntimeError('Unexpected node identifier')
        node.node_id = node_id
        node.type = 'ASD'
        node.save()

        # increase maintenance agents count for all nodes by 1
        for backend in AlbaBackendList.get_albabackends():
            nr_of_agents_key = AlbaNodeController.NR_OF_AGENTS_ETCD_TEMPLATE.format(backend.guid)
            if EtcdConfiguration.exists(nr_of_agents_key):
                EtcdConfiguration.set(nr_of_agents_key, int(EtcdConfiguration.get(nr_of_agents_key) + 1))
            else:
                EtcdConfiguration.set(nr_of_agents_key, 1)
        AlbaNodeController.checkup_maintenance_agents()
Пример #9
0
    def pulse():
        """
        Update the heartbeats for the Current Routers
        :return: None
        """
        logger = LogHandler.get('extensions', name='heartbeat')
        machine_id = System.get_my_machine_id()
        current_time = int(time.time())

        routers = StorageRouterList.get_storagerouters()
        for node in routers:
            if node.machine_id == machine_id:
                with volatile_mutex('storagerouter_heartbeat_{0}'.format(node.guid)):
                    node_save = StorageRouter(node.guid)
                    node_save.heartbeats['process'] = current_time
                    node_save.save()
                StorageRouterController.ping.s(node.guid, current_time).apply_async(routing_key='sr.{0}'.format(machine_id))
            else:
                try:
                    # check timeout of other nodes and clear arp cache
                    if node.heartbeats and 'process' in node.heartbeats:
                        if current_time - node.heartbeats['process'] >= HeartBeat.ARP_TIMEOUT:
                            check_output("/usr/sbin/arp -d '{0}'".format(node.name.replace(r"'", r"'\''")), shell=True)
                except CalledProcessError:
                    logger.exception('Error clearing ARP cache')
Пример #10
0
 def remove(license_guid):
     """
     Removes a license
     """
     clients = {}
     storagerouters = StorageRouterList.get_storagerouters()
     try:
         for storagerouter in storagerouters:
             clients[storagerouter] = SSHClient(storagerouter.ip)
     except UnableToConnectException:
         raise RuntimeError('Not all StorageRouters are reachable')
     lic = License(license_guid)
     if lic.can_remove is True:
         remove_functions = Toolbox.fetch_hooks('license', '{0}.remove'.format(lic.component))
         result = remove_functions[0](component=lic.component, data=lic.data, valid_until=lic.valid_until, signature=lic.signature)
         if result is True:
             lic.delete()
             license_contents = []
             for lic in LicenseList.get_licenses():
                 license_contents.append(lic.hash)
             for storagerouter in storagerouters:
                 client = clients[storagerouter]
                 client.file_write('/opt/OpenvStorage/config/licenses', '{0}\n'.format('\n'.join(license_contents)))
         return result
     return None
Пример #11
0
 def model_albanodes(**kwargs):
     """
     Add all ALBA nodes known to the config platform to the model
     :param kwargs: Kwargs containing information regarding the node
     :type kwargs: dict
     :return: None
     :rtype: NoneType
     """
     _ = kwargs
     if Configuration.dir_exists('/ovs/alba/asdnodes'):
         for node_id in Configuration.list('/ovs/alba/asdnodes'):
             node = AlbaNodeList.get_albanode_by_node_id(node_id)
             if node is None:
                 node = AlbaNode()
             main_config = Configuration.get(
                 '/ovs/alba/asdnodes/{0}/config/main'.format(node_id))
             node.type = 'ASD'
             node.node_id = node_id
             node.ip = main_config['ip']
             node.port = main_config['port']
             node.username = main_config['username']
             node.password = main_config['password']
             node.storagerouter = StorageRouterList.get_by_ip(
                 main_config['ip'])
             node.save()
Пример #12
0
 def _configure_arakoon_to_volumedriver():
     print 'Update existing vPools'
     logger.info('Update existing vPools')
     for storagerouter in StorageRouterList.get_storagerouters():
         with Remote(storagerouter.ip, [os, RawConfigParser, Configuration, StorageDriverConfiguration, ArakoonManagementEx], 'ovs') as remote:
             arakoon_cluster_config = remote.ArakoonManagementEx().getCluster('voldrv').getClientConfig()
             arakoon_nodes = []
             for node_id, node_config in arakoon_cluster_config.iteritems():
                 arakoon_nodes.append({'host': node_config[0][0],
                                       'port': node_config[1],
                                       'node_id': node_id})
             configuration_dir = '{0}/storagedriver/storagedriver'.format(
                 remote.Configuration.get('ovs.core.cfgdir'))
             if not remote.os.path.exists(configuration_dir):
                 remote.os.makedirs(configuration_dir)
             for json_file in remote.os.listdir(configuration_dir):
                 vpool_name = json_file.replace('.json', '')
                 if json_file.endswith('.json'):
                     if remote.os.path.exists('{0}/{1}.cfg'.format(configuration_dir, vpool_name)):
                         continue  # There's also a .cfg file, so this is an alba_proxy configuration file
                     storagedriver_config = remote.StorageDriverConfiguration('storagedriver', vpool_name)
                     storagedriver_config.load()
                     storagedriver_config.configure_volume_registry(vregistry_arakoon_cluster_id='voldrv',
                                                                    vregistry_arakoon_cluster_nodes=arakoon_nodes)
                     storagedriver_config.configure_distributed_lock_store(dls_type='Arakoon',
                                                                           dls_arakoon_cluster_id='voldrv',
                                                                           dls_arakoon_cluster_nodes=arakoon_nodes)
                     storagedriver_config.save()
Пример #13
0
 def on_demote(cluster_ip, master_ip):
     """
     Handles the demote for the StorageDrivers
     :param cluster_ip: IP of the node to demote
     :param master_ip: IP of the master node
     """
     client = SSHClient(cluster_ip, username='******')
     servicetype = ServiceTypeList.get_by_name('Arakoon')
     current_service = None
     remaining_ips = []
     for service in servicetype.services:
         if service.name == 'arakoon-voldrv':
             if service.storagerouter.ip == cluster_ip:
                 current_service = service
             else:
                 remaining_ips.append(service.storagerouter.ip)
     if current_service is not None:
         print '* Shrink StorageDriver cluster'
         ArakoonInstaller.shrink_cluster(master_ip, cluster_ip, 'voldrv')
         if ServiceManager.has_service(current_service.name, client=client) is True:
             ServiceManager.stop_service(current_service.name, client=client)
             ServiceManager.remove_service(current_service.name, client=client)
         ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips)
         current_service.delete()
         for storagerouter in StorageRouterList.get_storagerouters():
             ArakoonInstaller.deploy_to_slave(master_ip, storagerouter.ip, 'voldrv')
         StorageDriverController._configure_arakoon_to_volumedriver()
Пример #14
0
 def register(node_id):
     """
     Adds a Node with a given node_id to the model
     :param node_id: ID of the ALBA node
     :type node_id: str
     :return: None
     """
     node = AlbaNodeList.get_albanode_by_node_id(node_id)
     if node is None:
         main_config = Configuration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id))
         node = AlbaNode()
         node.ip = main_config['ip']
         node.port = main_config['port']
         node.username = main_config['username']
         node.password = main_config['password']
         node.storagerouter = StorageRouterList.get_by_ip(main_config['ip'])
     data = node.client.get_metadata()
     if data['_success'] is False and data['_error'] == 'Invalid credentials':
         raise RuntimeError('Invalid credentials')
     if data['node_id'] != node_id:
         AlbaNodeController._logger.error('Unexpected node_id: {0} vs {1}'.format(data['node_id'], node_id))
         raise RuntimeError('Unexpected node identifier')
     node.node_id = node_id
     node.type = 'ASD'
     node.save()
     AlbaController.checkup_maintenance_agents.delay()
Пример #15
0
 def model_alba_node(node_id, node_type, ip=None):
     # type: (str, str, Optional[str]) -> AlbaNode
     """
     Models a non-existing AlbaNode
     :param node_id: ID of the node
     :type node_id: str
     :param node_type: Type of the node
     :type node_type: str
     :param ip: IP of the node
     :type ip: str
     :return: The modeled node
     :rtype: AlbaNode
     """
     node = AlbaNode()
     node.type = node_type
     node.node_id = node_id
     config_path = AlbaNode.CONFIG_LOCATIONS[node_type].format(
         node_id)  # type str
     node.ip = ip or Configuration.get(os.path.join(config_path, 'main|ip'))
     node.port = Configuration.get(os.path.join(config_path, 'main|port'))
     node.username = Configuration.get(
         os.path.join(config_path, 'main|username'))
     node.password = Configuration.get(
         os.path.join(config_path, 'main|password'))
     node.storagerouter = StorageRouterList.get_by_ip(node.ip)
     return node
Пример #16
0
    def pulse():
        """
        Update the heartbeats for all Storage Routers
        :return: None
        """
        logger = LogHandler.get('extensions', name='heartbeat')

        current_time = int(time.time())
        machine_id = System.get_my_machine_id()
        amqp = '{0}://{1}:{2}@{3}//'.format(EtcdConfiguration.get('/ovs/framework/messagequeue|protocol'),
                                            EtcdConfiguration.get('/ovs/framework/messagequeue|user'),
                                            EtcdConfiguration.get('/ovs/framework/messagequeue|password'),
                                            EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format(machine_id)))

        celery_path = OSManager.get_path('celery')
        worker_states = check_output("{0} inspect ping -b {1} --timeout=5 2> /dev/null | grep OK | perl -pe 's/\x1b\[[0-9;]*m//g' || true".format(celery_path, amqp), shell=True)
        routers = StorageRouterList.get_storagerouters()
        for node in routers:
            if node.heartbeats is None:
                node.heartbeats = {}
            if 'celery@{0}: OK'.format(node.name) in worker_states:
                node.heartbeats['celery'] = current_time
            if node.machine_id == machine_id:
                node.heartbeats['process'] = current_time
            else:
                try:
                    # check timeout of other nodes and clear arp cache
                    if node.heartbeats and 'process' in node.heartbeats:
                        if current_time - node.heartbeats['process'] >= HeartBeat.ARP_TIMEOUT:
                            check_output("/usr/sbin/arp -d {0}".format(node.name), shell=True)
                except CalledProcessError:
                    logger.exception('Error clearing ARP cache')
            node.save()
Пример #17
0
    def get_stats_storagerouters(cls):
        """
        Retrieve amount of vDisks and some read/write statistics for all StorageRouters
        """
        if cls._config is None:
            cls.validate_and_retrieve_config()

        stats = []
        errors = False
        environment = cls._config['environment']
        for storagerouter in StorageRouterList.get_storagerouters():
            if len(storagerouter.storagedrivers) == 0:
                cls._logger.debug('StorageRouter {0} does not have any StorageDrivers linked to it, skipping'.format(storagerouter.name))
                continue
            try:
                statistics = storagerouter.statistics
                stats.append({'tags': {'environment': environment,
                                       'storagerouter_name': storagerouter.name},
                              'fields': {'read_byte': statistics['data_read'],
                                         'write_byte': statistics['data_written'],
                                         'operations': statistics['4k_operations'],
                                         'amount_vdisks': len(storagerouter.vdisks_guids),
                                         'read_operations': statistics['4k_read_operations'],
                                         'write_operations': statistics['4k_write_operations']},
                              'measurement': 'storagerouter'})
            except Exception:
                errors = True
                cls._logger.exception('Retrieving statistics for StorageRouter {0} failed'.format(storagerouter.name))
        return errors, stats
Пример #18
0
    def set_config_params(self, vdisk, new_config_params, version):
        """
        Sets configuration parameters to a given vdisk.
        :param vdisk: Guid of the virtual disk to configure
        :type vdisk: VDisk
        :param new_config_params: Configuration settings for the virtual disk
        :type new_config_params: dict
        :param version: Client version
        :type version: int
        """
        if version == 1 and 'dtl_target' in new_config_params:
            storage_router = StorageRouterList.get_by_ip(
                new_config_params['dtl_target'])
            if storage_router is None:
                raise HttpNotAcceptableException(
                    error_description=
                    'API version 1 requires a Storage Router IP',
                    error='invalid_version')
            new_config_params['dtl_target'] = [
                junction.domain_guid for junction in storage_router.domains
            ]

        new_config_params.pop('dedupe_mode', None)
        new_config_params.pop('cache_strategy', None)
        new_config_params.pop('readcache_limit', None)
        return VDiskController.set_config_params.delay(
            vdisk_guid=vdisk.guid, new_config_params=new_config_params)
Пример #19
0
 def remove(license_guid):
     """
     Removes a license
     """
     clients = {}
     storagerouters = StorageRouterList.get_storagerouters()
     try:
         for storagerouter in storagerouters:
             clients[storagerouter] = SSHClient(storagerouter.ip)
     except UnableToConnectException:
         raise RuntimeError('Not all StorageRouters are reachable')
     lic = License(license_guid)
     if lic.can_remove is True:
         remove_functions = Toolbox.fetch_hooks(
             'license', '{0}.remove'.format(lic.component))
         result = remove_functions[0](component=lic.component,
                                      data=lic.data,
                                      valid_until=lic.valid_until,
                                      signature=lic.signature)
         if result is True:
             lic.delete()
             license_contents = []
             for lic in LicenseList.get_licenses():
                 license_contents.append(lic.hash)
             for storagerouter in storagerouters:
                 client = clients[storagerouter]
                 client.file_write(
                     '/opt/OpenvStorage/config/licenses',
                     '{0}\n'.format('\n'.join(license_contents)))
         return result
     return None
 def get_storage_router_by_ip(ip):
     """
     Retrieve Storage Router based on IP
     :param ip: IP of Storage Router
     :return: Storage Router DAL object
     """
     return StorageRouterList.get_by_ip(ip)
Пример #21
0
def getVPoolByIPandPort(ip, port):
    for storagerouter in StorageRouterList.get_storagerouters():
        for storagedriver in storagerouter.storagedrivers:
            if storagedriver.ports[
                    "edge"] == port and storagedriver.storage_ip == ip:
                return storagedriver.vpool
    raise RuntimeError("Could not find vpool for {}:{}".format(ip, port))
Пример #22
0
    def _run_and_validate_dtl_checkup(self, vdisk, validations):
        """
        Execute the DTL checkup for a vDisk and validate the settings afterwards
        """
        single_node = len(StorageRouterList.get_storagerouters()) == 1
        VDiskController.dtl_checkup(vdisk_guid=vdisk.guid)
        config = vdisk.storagedriver_client.get_dtl_config(vdisk.volume_id)
        config_mode = vdisk.storagedriver_client.get_dtl_config_mode(vdisk.volume_id)
        msg = '{0} node - {{0}} - Actual: {{1}} - Expected: {{2}}'.format('Single' if single_node is True else 'Multi')

        validations.append({'key': 'config_mode', 'value': DTLConfigMode.MANUAL})
        for validation in validations:
            key = validation['key']
            value = validation['value']
            if key == 'config':
                actual_value = config
            elif key == 'host':
                actual_value = config.host
            elif key == 'port':
                actual_value = config.port
            elif key == 'mode':
                actual_value = config.mode
            else:
                actual_value = config_mode

            if isinstance(value, list):
                self.assertTrue(expr=actual_value in value,
                                msg=msg.format(key.capitalize(), actual_value, ', '.join(value)))
            else:
                self.assertEqual(first=actual_value,
                                 second=value,
                                 msg=msg.format(key.capitalize(), actual_value, value))
        return config
 def _configure_arakoon_to_volumedriver(offline_node_ips=None):
     print 'Update existing vPools'
     logger.info('Update existing vPools')
     if offline_node_ips is None:
         offline_node_ips = []
     for storagerouter in StorageRouterList.get_storagerouters():
         config = ArakoonClusterConfig('voldrv')
         config.load_config()
         arakoon_nodes = []
         for node in config.nodes:
             arakoon_nodes.append({'host': node.ip,
                                   'port': node.client_port,
                                   'node_id': node.name})
         with Remote(storagerouter.ip, [os, RawConfigParser, EtcdConfiguration, StorageDriverConfiguration], 'ovs') as remote:
             configuration_dir = '{0}/storagedriver/storagedriver'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir'))
             if not remote.os.path.exists(configuration_dir):
                 remote.os.makedirs(configuration_dir)
             for json_file in remote.os.listdir(configuration_dir):
                 vpool_name = json_file.replace('.json', '')
                 if json_file.endswith('.json'):
                     if remote.os.path.exists('{0}/{1}.cfg'.format(configuration_dir, vpool_name)):
                         continue  # There's also a .cfg file, so this is an alba_proxy configuration file
                     storagedriver_config = remote.StorageDriverConfiguration('storagedriver', vpool_name)
                     storagedriver_config.load()
                     storagedriver_config.configure_volume_registry(vregistry_arakoon_cluster_id='voldrv',
                                                                    vregistry_arakoon_cluster_nodes=arakoon_nodes)
                     storagedriver_config.configure_distributed_lock_store(dls_type='Arakoon',
                                                                           dls_arakoon_cluster_id='voldrv',
                                                                           dls_arakoon_cluster_nodes=arakoon_nodes)
                     storagedriver_config.save(reload_config=True)
Пример #24
0
    def pulse():
        """
        Update the heartbeats for the Current Routers
        :return: None
        """
        logger = Logger('extensions-generic')
        machine_id = System.get_my_machine_id()
        current_time = int(time.time())

        routers = StorageRouterList.get_storagerouters()
        for node in routers:
            if node.machine_id == machine_id:
                with volatile_mutex('storagerouter_heartbeat_{0}'.format(
                        node.guid)):
                    node_save = StorageRouter(node.guid)
                    node_save.heartbeats['process'] = current_time
                    node_save.save()
                StorageRouterController.ping.s(
                    node.guid, current_time).apply_async(
                        routing_key='sr.{0}'.format(machine_id))
            else:
                try:
                    # check timeout of other nodes and clear arp cache
                    if node.heartbeats and 'process' in node.heartbeats:
                        if current_time - node.heartbeats[
                                'process'] >= HeartBeat.ARP_TIMEOUT:
                            check_output("/usr/sbin/arp -d '{0}'".format(
                                node.name.replace(r"'", r"'\''")),
                                         shell=True)
                except CalledProcessError:
                    logger.exception('Error clearing ARP cache')
Пример #25
0
 def _configure_arakoon_to_volumedriver():
     print "Update existing vPools"
     logger.info("Update existing vPools")
     for storagerouter in StorageRouterList.get_storagerouters():
         config = ArakoonClusterConfig("voldrv")
         config.load_config()
         arakoon_nodes = []
         for node in config.nodes:
             arakoon_nodes.append({"host": node.ip, "port": node.client_port, "node_id": node.name})
         with Remote(
             storagerouter.ip, [os, RawConfigParser, EtcdConfiguration, StorageDriverConfiguration], "ovs"
         ) as remote:
             configuration_dir = "{0}/storagedriver/storagedriver".format(
                 EtcdConfiguration.get("/ovs/framework/paths|cfgdir")
             )
             if not remote.os.path.exists(configuration_dir):
                 remote.os.makedirs(configuration_dir)
             for json_file in remote.os.listdir(configuration_dir):
                 vpool_name = json_file.replace(".json", "")
                 if json_file.endswith(".json"):
                     if remote.os.path.exists("{0}/{1}.cfg".format(configuration_dir, vpool_name)):
                         continue  # There's also a .cfg file, so this is an alba_proxy configuration file
                     storagedriver_config = remote.StorageDriverConfiguration("storagedriver", vpool_name)
                     storagedriver_config.load()
                     storagedriver_config.configure_volume_registry(
                         vregistry_arakoon_cluster_id="voldrv", vregistry_arakoon_cluster_nodes=arakoon_nodes
                     )
                     storagedriver_config.configure_distributed_lock_store(
                         dls_type="Arakoon", dls_arakoon_cluster_id="voldrv", dls_arakoon_cluster_nodes=arakoon_nodes
                     )
                     storagedriver_config.save(reload_config=True)
Пример #26
0
 def test_ssh_connectivity():
     """
     Validates whether all nodes can SSH into eachother
     """
     MonitoringController._logger.info('Starting SSH connectivity test...')
     ips = [sr.ip for sr in StorageRouterList.get_storagerouters()]
     for ip in ips:
         for primary_username in ['root', 'ovs']:
             try:
                 with remote(ip, [SSHClient],
                             username=primary_username) as rem:
                     for local_ip in ips:
                         for username in ['root', 'ovs']:
                             message = '* Connection from {0}@{1} to {2}@{3}... {{0}}'.format(
                                 primary_username, ip, username, local_ip)
                             try:
                                 c = rem.SSHClient(local_ip,
                                                   username=username)
                                 assert c.run(['whoami'
                                               ]).strip() == username
                                 message = message.format('OK')
                                 logger = MonitoringController._logger.info
                             except Exception as ex:
                                 message = message.format(ex.message)
                                 logger = MonitoringController._logger.error
                             logger(message)
             except Exception as ex:
                 MonitoringController._logger.error(
                     '* Could not connect to {0}@{1}: {2}'.format(
                         primary_username, ip, ex.message))
     MonitoringController._logger.info('Finished')
Пример #27
0
    def _run_and_validate_dtl_checkup(self, vdisk, validations):
        """
        Execute the DTL checkup for a vDisk and validate the settings afterwards
        """
        single_node = len(StorageRouterList.get_storagerouters()) == 1
        VDiskController.dtl_checkup(vdisk_guid=vdisk.guid)
        config = vdisk.storagedriver_client.get_dtl_config(vdisk.volume_id)
        config_mode = vdisk.storagedriver_client.get_dtl_config_mode(vdisk.volume_id)
        msg = '{0} node - {{0}} - Actual: {{1}} - Expected: {{2}}'.format('Single' if single_node is True else 'Multi')

        validations.append({'key': 'config_mode', 'value': DTLConfigMode.MANUAL})
        for validation in validations:
            key = validation['key']
            value = validation['value']
            if key == 'config':
                actual_value = config
            elif key == 'host':
                actual_value = config.host
            elif key == 'port':
                actual_value = config.port
            elif key == 'mode':
                actual_value = config.mode
            else:
                actual_value = config_mode

            if isinstance(value, list):
                self.assertTrue(expr=actual_value in value,
                                msg=msg.format(key.capitalize(), actual_value, ', '.join(value)))
            else:
                self.assertEqual(first=actual_value,
                                 second=value,
                                 msg=msg.format(key.capitalize(), actual_value, value))
        return config
Пример #28
0
 def print_current_mds_layout():
     """
     Prints the current MDS layout
     """
     output = ['',
               'Open vStorage - MDS debug information',
               '=====================================',
               'timestamp: {0}'.format(time.time()),
               '']
     for storagerouter in StorageRouterList.get_storagerouters():
         output.append('+ {0} ({1})'.format(storagerouter.name, storagerouter.ip))
         vpools = set(sd.vpool for sd in storagerouter.storagedrivers)
         for vpool in vpools:
             output.append('  + {0}'.format(vpool.name))
             for mds_service in vpool.mds_services:
                 if mds_service.service.storagerouter_guid == storagerouter.guid:
                     masters, slaves = 0, 0
                     for junction in mds_service.vdisks:
                         if junction.is_master:
                             masters += 1
                         else:
                             slaves += 1
                     capacity = mds_service.capacity
                     if capacity == -1:
                         capacity = 'infinite'
                     load, _ = MDSServiceController.get_mds_load(mds_service)
                     if load == float('inf'):
                         load = 'infinite'
                     else:
                         load = '{0}%'.format(round(load, 2))
                     output.append('    + {0} - port {1} - {2} master(s), {3} slave(s) - capacity: {4}, load: {5}'.format(
                         mds_service.number, mds_service.service.ports[0], masters, slaves, capacity, load
                     ))
     print '\n'.join(output)
Пример #29
0
 def list(self):
     """
     Overview of all StorageRouters
     :return: List of StorageRouters
     :rtype: list[ovs.dal.hybrids.storagerouter.StorageRouter]
     """
     return StorageRouterList.get_storagerouters()
Пример #30
0
    def build_new_kwargs(original_function, request, instance, version, raw_version, passed_kwargs):
        # type: (callable, Union[WSGIRequest, Request], DataObject, int, str, **any) -> Tuple[dict, dict]
        """
        Convert all positional arguments to keyword arguments
        :param original_function: The orignally decorated function
        :type original_function: callable
        :param request: API request object
        :type request: Union[WSGIRequest, Request]
        :param instance: The data object instance to inject
        :type instance: DataObject
        :param version: Parsed API version
        :type version: int
        :param raw_version: Unparsed API version
        :type raw_version: str
        :param passed_kwargs: Kwargs passed to the original function
        :type passed_kwargs: dict
        :return: The kwargs for the original function and the kwargs for the validator
        :rtype: Tuple[dict, dict]
        """
        function_metadata = original_function.ovs_metadata
        kwargs = {}
        validator_kwargs = {}
        empty = object()
        # Special reserved keywords
        reserved = {'version': version,
                    'raw_version': raw_version,
                    'request': request,
                    'local_storagerouter': StorageRouterList.get_by_machine_id(settings.UNIQUE_ID)}
        if instance is not None:
            reserved[object_type.__name__.lower()] = instance

        for mandatory_vars, optional_vars, new_kwargs in [(function_metadata['load']['mandatory'][:], function_metadata['load']['optional'][:], kwargs),
                                                          (validation_mandatory_vars[:], validation_optional_vars[:], validator_kwargs)]:
            for keyword, value in reserved.iteritems():
                if keyword in mandatory_vars:
                    new_kwargs[keyword] = value
                    mandatory_vars.remove(keyword)

            # The rest of the parameters
            post_data = request.DATA if hasattr(request, 'DATA') else request.POST
            query_params = request.QUERY_PARAMS if hasattr(request, 'QUERY_PARAMS') else request.GET
            # Used to detect if anything was passed. Can't use None as the value passed might be None
            data_containers = [passed_kwargs, post_data, query_params]
            for parameters, mandatory in ((mandatory_vars, True), (optional_vars, False)):
                for name in parameters:
                    val = empty
                    for container in data_containers:
                        val = container.get(name, empty)
                        if val != empty:
                            break
                    if val != empty:
                        # Embrace our design flaw. The query shouldn't be json dumped separately.
                        if name == 'query':
                            val = _try_parse(val)
                        new_kwargs[name] = _try_convert_bool(val)
                    elif mandatory:
                        raise HttpNotAcceptableException(error_description='Invalid data passed: {0} is missing'.format(name),
                                                         error='invalid_data')
        return kwargs, validator_kwargs
Пример #31
0
 def get_storagerouter_by_ip(storagerouter_ip):
     """
     :param storagerouter_ip: ip of a storagerouter
     :type storagerouter_ip: str
     :return: storagerouter
     :rtype: ovs.dal.hybrids.storagerouter.StorageRouter
     """
     return StorageRouterList.get_by_ip(storagerouter_ip)
Пример #32
0
    def get_storagerouters():
        """
        Fetch the storagerouters

        :return: list with storagerouters
        :rtype: list
        """
        return StorageRouterList.get_storagerouters()
    def get_by_machine_id(machine_id):
        """
        Fetch a dal machine by id

        :param machine_id: id of the machine
        :return:
        """

        return StorageRouterList.get_by_machine_id(machine_id)
Пример #34
0
 def get_available_actions(self):
     """
     Gets a list of all available actions
     """
     actions = []
     storagerouters = StorageRouterList.get_storagerouters()
     if len(storagerouters) > 1:
         actions.append('MOVE_AWAY')
     return Response(actions, status=status.HTTP_200_OK)
Пример #35
0
 def get_available_actions(self):
     """
     Gets a list of all available actions
     """
     actions = []
     storagerouters = StorageRouterList.get_storagerouters()
     if len(storagerouters) > 1:
         actions.append('MOVE_AWAY')
     return actions
Пример #36
0
 def get_available_actions(self):
     """
     Gets a list of all available actions
     """
     actions = []
     storagerouters = StorageRouterList.get_storagerouters()
     if len(storagerouters) > 1:
         actions.append('MOVE_AWAY')
     return actions
Пример #37
0
 def list(self, query=None):
     """
     Overview of all Storage Routers
     """
     if query is None:
         return StorageRouterList.get_storagerouters()
     else:
         query = json.loads(query)
         return DataList(StorageRouter, query)
Пример #38
0
 def get_available_actions(self):
     """
     Gets a list of all available actions
     """
     actions = []
     storagerouters = StorageRouterList.get_storagerouters()
     if len(storagerouters) > 1:
         actions.append('MOVE_AWAY')
     return Response(actions, status=status.HTTP_200_OK)
Пример #39
0
 def list(self, query=None):
     """
     Overview of all Storage Routers
     """
     if query is None:
         return StorageRouterList.get_storagerouters()
     else:
         query = json.loads(query)
         return DataList(StorageRouter, query)
Пример #40
0
 def apply(license_string):
     """
     Applies a license. It will apply as much licenses as possible, however, it won't fail on invalid licenses as it
     will simply skip them.
     """
     try:
         clients = {}
         storagerouters = StorageRouterList.get_storagerouters()
         try:
             for storagerouter in storagerouters:
                 clients[storagerouter] = SSHClient(storagerouter.ip)
         except UnableToConnectException:
             raise RuntimeError('Not all StorageRouters are reachable')
         data = LicenseController._decode(license_string)
         for component in data:
             cdata = data[component]
             name = cdata['name']
             data = cdata['data']
             token = cdata['token']
             valid_until = float(
                 cdata['valid_until']) if 'valid_until' in cdata else None
             if valid_until is not None and valid_until <= time.time():
                 continue
             signature = cdata['signature'] if 'signature' in cdata else None
             validate_functions = Toolbox.fetch_hooks(
                 'license', '{0}.validate'.format(component))
             apply_functions = Toolbox.fetch_hooks(
                 'license', '{0}.apply'.format(component))
             if len(validate_functions) == 1 and len(apply_functions) == 1:
                 valid, metadata = validate_functions[0](
                     component=component, data=data, signature=signature)
                 if valid is True:
                     success = apply_functions[0](component=component,
                                                  data=data,
                                                  signature=signature)
                     if success is True:
                         license_object = LicenseList.get_by_component(
                             component)
                         if license_object is None:
                             license_object = License()
                         license_object.component = component
                         license_object.name = name
                         license_object.token = token
                         license_object.data = data
                         license_object.valid_until = valid_until
                         license_object.signature = signature
                         license_object.save()
         license_contents = []
         for lic in LicenseList.get_licenses():
             license_contents.append(lic.hash)
         for storagerouter in storagerouters:
             client = clients[storagerouter]
             client.file_write('/opt/OpenvStorage/config/licenses',
                               '{0}\n'.format('\n'.join(license_contents)))
     except Exception, ex:
         logger.exception('Error applying license: {0}'.format(ex))
         return None
Пример #41
0
    def get_mds_storagedriver_config_set(vpool, check_online=False):
        """
        Builds a configuration for all StorageRouters from a given VPool with following goals:
        * Primary MDS is the local one
        * All slaves are on different hosts
        * Maximum `mds_safety` nodes are returned
        The configuration returned is the default configuration used by the volumedriver of which in normal use-cases
        only the 1st entry is used, because at volume creation time, the volumedriver needs to create 1 master MDS
        During ensure_safety, we actually create/set the MDS slaves for each volume

        :param vpool: vPool to get storagedriver configuration for
        :type vpool: VPool

        :param check_online: Check whether the storage routers are actually responsive
        :type check_online: bool

        :return: MDS configuration for a vPool
        :rtype: dict
        """
        mds_per_storagerouter = {}
        mds_per_load = {}
        for storagedriver in vpool.storagedrivers:
            storagerouter = storagedriver.storagerouter
            if check_online is True:
                try:
                    SSHClient(storagerouter)
                except UnableToConnectException:
                    continue
            mds_service, load = MDSServiceController.get_preferred_mds(storagerouter, vpool)
            if mds_service is None:
                raise RuntimeError('Could not find an MDS service')
            mds_per_storagerouter[storagerouter] = {'host': storagerouter.ip, 'port': mds_service.service.ports[0]}
            if load not in mds_per_load:
                mds_per_load[load] = []
            mds_per_load[load].append(storagerouter)

        safety = Configuration.get('/ovs/framework/storagedriver|mds_safety')
        config_set = {}
        for storagerouter, ip_info in mds_per_storagerouter.iteritems():
            config_set[storagerouter.guid] = [ip_info]
            for importance in ['primary', 'secondary']:
                domains = [junction.domain for junction in storagerouter.domains if junction.backup is (importance == 'secondary')]
                possible_storagerouters = set()
                for domain in domains:
                    possible_storagerouters.update(StorageRouterList.get_primary_storagerouters_for_domain(domain))

                for load in sorted(mds_per_load):
                    if len(config_set[storagerouter.guid]) >= safety:
                        break
                    other_storagerouters = mds_per_load[load]
                    random.shuffle(other_storagerouters)
                    for other_storagerouter in other_storagerouters:
                        if len(config_set[storagerouter.guid]) >= safety:
                            break
                        if other_storagerouter != storagerouter and other_storagerouter in possible_storagerouters:
                            config_set[storagerouter.guid].append(mds_per_storagerouter[other_storagerouter])
        return config_set
Пример #42
0
 def manage_running_tasks(tasklist, timesleep=10):
     """
     Manage a list of running celery task
     - discard PENDING tasks after a certain timeout
     - validate RUNNING tasks are actually running
     :param tasklist: Dictionary of tasks to wait {IP address: AsyncResult}
     :type tasklist: dict
     :param timesleep: leep between checks -
       -for long running tasks it's better to sleep for a longer period of time to reduce number of ssh calls
     :type timesleep: int
     :return: results
     :rtype: dict
     """
     logger = LogHandler.get('lib', name='celery toolbox')
     ssh_clients = {}
     tasks_pending = {}
     tasks_pending_timeout = 1800  # 30 minutes
     results = {}
     failed_nodes = []
     while len(tasklist.keys()) > 0:
         for ip, task in tasklist.items():
             if task.state in ('SUCCESS', 'FAILURE'):
                 logger.info('Task {0} finished: {1}'.format(task.id, task.state))
                 results[ip] = task.get(propagate=False)
                 del tasklist[ip]
             elif task.state == 'PENDING':
                 if task.id not in tasks_pending:
                     tasks_pending[task.id] = time.time()
                 else:
                     task_pending_since = tasks_pending[task.id]
                     if time.time() - task_pending_since > tasks_pending_timeout:
                         logger.warning('Task {0} is pending since {1} on node {2}. Task will be revoked'.format(task.id, datetime.datetime.fromtimestamp(task_pending_since), ip))
                         revoke(task.id)
                         del tasklist[ip]
                         del tasks_pending[task.id]
                         failed_nodes.append(ip)
             elif task.state == 'STARTED':
                 if ip not in ssh_clients:
                     ssh_clients[ip] = SSHClient(ip, username='******')
                 client = ssh_clients[ip]
                 if ServiceManager.get_service_status('workers', client) is False:
                     logger.error('Service ovs-workers on node {0} appears halted while there is a task PENDING for it {1}. Task will be revoked.'.format(ip, task.id))
                     revoke(task.id)
                     del tasklist[ip]
                     failed_nodes.append(ip)
                 else:
                     ping_result = task.app.control.inspect().ping()
                     storage_router = StorageRouterList.get_by_ip(ip)
                     if "celery@{0}".format(storage_router.name) not in ping_result:
                         logger.error('Service ovs-workers on node {0} is not reachable via rabbitmq while there is a task STARTED for it {1}. Task will be revoked.'.format(ip, task.id))
                         revoke(task.id)
                         del tasklist[ip]
                         failed_nodes.append(ip)
         if len(tasklist.keys()) > 0:
             time.sleep(timesleep)
     return results, failed_nodes
Пример #43
0
 def manage_running_tasks(tasklist, timesleep=10):
     """
     Manage a list of running celery task
     - discard PENDING tasks after a certain timeout
     - validate RUNNING tasks are actually running
     :param tasklist: Dictionary of tasks to wait {IP address: AsyncResult}
     :type tasklist: dict
     :param timesleep: leep between checks -
       -for long running tasks it's better to sleep for a longer period of time to reduce number of ssh calls
     :type timesleep: int
     :return: results
     :rtype: dict
     """
     logger = LogHandler.get('lib', name='celery toolbox')
     ssh_clients = {}
     tasks_pending = {}
     tasks_pending_timeout = 1800  # 30 minutes
     results = {}
     failed_nodes = []
     while len(tasklist.keys()) > 0:
         for ip, task in tasklist.items():
             if task.state in ('SUCCESS', 'FAILURE'):
                 logger.info('Task {0} finished: {1}'.format(task.id, task.state))
                 results[ip] = task.get(propagate=False)
                 del tasklist[ip]
             elif task.state == 'PENDING':
                 if task.id not in tasks_pending:
                     tasks_pending[task.id] = time.time()
                 else:
                     task_pending_since = tasks_pending[task.id]
                     if time.time() - task_pending_since > tasks_pending_timeout:
                         logger.warning('Task {0} is pending since {1} on node {2}. Task will be revoked'.format(task.id, datetime.datetime.fromtimestamp(task_pending_since), ip))
                         revoke(task.id)
                         del tasklist[ip]
                         del tasks_pending[task.id]
                         failed_nodes.append(ip)
             elif task.state == 'STARTED':
                 if ip not in ssh_clients:
                     ssh_clients[ip] = SSHClient(ip, username='******')
                 client = ssh_clients[ip]
                 if ServiceManager.get_service_status('workers', client) is False:
                     logger.error('Service ovs-workers on node {0} appears halted while there is a task PENDING for it {1}. Task will be revoked.'.format(ip, task.id))
                     revoke(task.id)
                     del tasklist[ip]
                     failed_nodes.append(ip)
                 else:
                     ping_result = task.app.control.inspect().ping()
                     storage_router = StorageRouterList.get_by_ip(ip)
                     if "celery@{0}".format(storage_router.name) not in ping_result:
                         logger.error('Service ovs-workers on node {0} is not reachable via rabbitmq while there is a task STARTED for it {1}. Task will be revoked.'.format(ip, task.id))
                         revoke(task.id)
                         del tasklist[ip]
                         failed_nodes.append(ip)
         if len(tasklist.keys()) > 0:
             time.sleep(timesleep)
     return results, failed_nodes
Пример #44
0
    def install_plugins():
        """
        (Re)load plugins
        """
        if ServiceManager.has_service('ovs-watcher-framework', SSHClient('127.0.0.1', username='******')):
            # If the watcher is running, 'ovs setup' was executed and we need to restart everything to load
            # the plugin. In the other case, the plugin will be loaded once 'ovs setup' is executed
            print 'Installing plugin into Open vStorage'
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            clients = {}
            masters = StorageRouterList.get_masters()
            slaves = StorageRouterList.get_slaves()
            try:
                for sr in masters + slaves:
                    clients[sr] = SSHClient(sr, username='******')
            except UnableToConnectException:
                raise RuntimeError('Not all StorageRouters are reachable')
            memcached = 'memcached'
            watcher = 'watcher-framework'
            for sr in masters + slaves:
                if ServiceManager.has_service(watcher, clients[sr]):
                    print '- Stopping watcher on {0} ({1})'.format(sr.name, sr.ip)
                    ServiceManager.stop_service(watcher, clients[sr])
            for sr in masters:
                print '- Restarting memcached on {0} ({1})'.format(sr.name, sr.ip)
                ServiceManager.restart_service(memcached, clients[sr])
            for sr in masters + slaves:
                if ServiceManager.has_service(watcher, clients[sr]):
                    print '- Starting watcher on {0} ({1})'.format(sr.name, sr.ip)
                    ServiceManager.start_service(watcher, clients[sr])

            print '- Execute model migrations'
            from ovs.dal.helpers import Migration
            Migration.migrate()

            from ovs.lib.helpers.toolbox import Toolbox
            ip = System.get_my_storagerouter().ip
            functions = Toolbox.fetch_hooks('plugin', 'postinstall')
            if len(functions) > 0:
                print '- Execute post installation scripts'
            for function in functions:
                function(ip=ip)
            print 'Installing plugin into Open vStorage: Completed'
Пример #45
0
 def list(self, query=None):
     """
     Overview of all Storage Routers
     """
     if query is None:
         return StorageRouterList.get_storagerouters()
     else:
         query = json.loads(query)
         query_result = DataList({"object": StorageRouter, "data": DataList.select.GUIDS, "query": query}).data
         return DataObjectList(query_result, StorageRouter)
Пример #46
0
 def list(self, query=None):
     """
     Overview of all Storage Routers
     :param query: A query to filter the StorageRouters
     :type query: DataQuery
     """
     if query is None:
         return StorageRouterList.get_storagerouters()
     else:
         return DataList(StorageRouter, query)
Пример #47
0
    def collapse_arakoon():
        """
        Collapse Arakoon's Tlogs
        :return: None
        """
        from ovs_extensions.generic.toolbox import ExtensionsToolbox

        GenericController._logger.info('Arakoon collapse started')
        cluster_info = []
        storagerouters = StorageRouterList.get_storagerouters()
        if os.environ.get('RUNNING_UNITTESTS') != 'True':
            cluster_info = [('cacc', storagerouters[0])]

        cluster_names = []
        for service in ServiceList.get_services():
            if service.is_internal is True and service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON,
                                                                     ServiceType.SERVICE_TYPES.NS_MGR,
                                                                     ServiceType.SERVICE_TYPES.ALBA_MGR):
                cluster = ExtensionsToolbox.remove_prefix(service.name, 'arakoon-')
                if cluster in cluster_names and cluster not in [ARAKOON_NAME, ARAKOON_NAME_UNITTEST]:
                    continue
                cluster_names.append(cluster)
                cluster_info.append((cluster, service.storagerouter))
        workload = {}
        cluster_config_map = {}
        for cluster, storagerouter in cluster_info:
            GenericController._logger.debug('  Collecting info for cluster {0}'.format(cluster))
            ip = storagerouter.ip if cluster in [ARAKOON_NAME, ARAKOON_NAME_UNITTEST] else None
            try:
                config = ArakoonClusterConfig(cluster_id=cluster, source_ip=ip)
                cluster_config_map[cluster] = config
            except:
                GenericController._logger.exception('  Retrieving cluster information on {0} for {1} failed'.format(storagerouter.ip, cluster))
                continue
            for node in config.nodes:
                if node.ip not in workload:
                    workload[node.ip] = {'node_id': node.name,
                                         'clusters': []}
                workload[node.ip]['clusters'].append((cluster, ip))
        for storagerouter in storagerouters:
            try:
                if storagerouter.ip not in workload:
                    continue
                node_workload = workload[storagerouter.ip]
                client = SSHClient(storagerouter)
                for cluster, ip in node_workload['clusters']:
                    try:
                        GenericController._logger.debug('  Collapsing cluster {0} on {1}'.format(cluster, storagerouter.ip))
                        client.run(['arakoon', '--collapse-local', node_workload['node_id'], '2', '-config', cluster_config_map[cluster].external_config_path])
                        GenericController._logger.debug('  Collapsing cluster {0} on {1} completed'.format(cluster, storagerouter.ip))
                    except:
                        GenericController._logger.exception('  Collapsing cluster {0} on {1} failed'.format(cluster, storagerouter.ip))
            except UnableToConnectException:
                GenericController._logger.error('  Could not collapse any cluster on {0} (not reachable)'.format(storagerouter.name))
        GenericController._logger.info('Arakoon collapse finished')
Пример #48
0
    def get(self, request, *args, **kwargs):
        """
        Fetches metadata
        """
        _ = args, kwargs
        data = {'authenticated': False,
                'authentication_state': None,
                'username': None,
                'userguid': None,
                'roles': [],
                'storagerouter_ips': [sr.ip for sr in StorageRouterList.get_storagerouters()],
                'versions': list(settings.VERSION),
                'plugins': {}}
        try:
            # Gather plugin metadata
            plugins = {}
            # - Backends. BackendType plugins must set the has_plugin flag on True
            for backend_type in BackendTypeList.get_backend_types():
                if backend_type.has_plugin is True:
                    if backend_type.code not in plugins:
                        plugins[backend_type.code] = []
                    plugins[backend_type.code] += ['backend', 'gui']
            data['plugins'] = plugins

            # Gather authorization metadata
            if 'HTTP_AUTHORIZATION' not in request.META:
                return HttpResponse, dict(data.items() + {'authentication_state': 'unauthenticated'}.items())
            authorization_type, access_token = request.META['HTTP_AUTHORIZATION'].split(' ')
            if authorization_type != 'Bearer':
                return HttpResponse, dict(data.items() + {'authentication_state': 'invalid_authorization_type'}.items())
            tokens = BearerTokenList.get_by_access_token(access_token)
            if len(tokens) != 1:
                return HttpResponse, dict(data.items() + {'authentication_state': 'invalid_token'}.items())
            token = tokens[0]
            if token.expiration < time.time():
                for junction in token.roles.itersafe():
                    junction.delete()
                token.delete()
                return HttpResponse, dict(data.items() + {'authentication_state': 'token_expired'}.items())

            # Gather user metadata
            user = token.client.user
            if not user.is_active:
                return HttpResponse, dict(data.items() + {'authentication_state': 'inactive_user'}.items())
            roles = [j.role.code for j in token.roles]

            return HttpResponse, dict(data.items() + {'authenticated': True,
                                                      'authentication_state': 'authenticated',
                                                      'username': user.username,
                                                      'userguid': user.guid,
                                                      'roles': roles,
                                                      'plugins': plugins}.items())
        except Exception as ex:
            logger.exception('Unexpected exception: {0}'.format(ex))
            return HttpResponse, dict(data.items() + {'authentication_state': 'unexpected_exception'}.items())
Пример #49
0
 def get_scrub_storagerouters(self):
     """
     Loads a list of suitable StorageRouters for scrubbing the given vDisk
     """
     storagerouters = []
     for storagerouter in StorageRouterList.get_storagerouters():
         scrub_partitions = storagerouter.partition_config.get(DiskPartition.ROLES.SCRUB, [])
         if len(scrub_partitions) == 0:
             continue
         storagerouters.append(storagerouter)
     return storagerouters
Пример #50
0
 def get_my_storagerouter():
     """
     Returns unique machine storagerouter id
     :return: Storage Router this is executed on
     :rtype: StorageRouter
     """
     from ovs.dal.lists.storagerouterlist import StorageRouterList
     storagerouter = StorageRouterList.get_by_machine_id(System.get_my_machine_id())
     if storagerouter is None:
         raise RuntimeError('Could not find the local StorageRouter')
     return storagerouter
Пример #51
0
    def get_storagerouter_ips():
        """
         Fetch all the ip addresses in this cluster

         :return: list with storagerouter ips
         :rtype: list
         """
        return [
            storagerouter.ip
            for storagerouter in StorageRouterList.get_storagerouters()
        ]
    def _remote_stack(self):
        """
        Live list of information about remote linked OSDs of type ALBA BACKEND
        :return: Information about all linked OSDs
        :rtype: dict
        """
        # Import here to prevent from circular references
        from ovs.dal.hybrids.albaosd import AlbaOSD

        def _load_backend_info(_connection_info, _alba_backend_guid):
            client = OVSClient(ip=_connection_info['host'],
                               port=_connection_info['port'],
                               credentials=(_connection_info['username'], _connection_info['password']),
                               version=3)

            try:
                info = client.get('/alba/backends/{0}/'.format(_alba_backend_guid),
                                  params={'contents': 'local_summary'})
                with lock:
                    return_value[_alba_backend_guid].update(info['local_summary'])
            except NotFoundException:
                return_value[_alba_backend_guid]['error'] = 'backend_deleted'
            except ForbiddenException:
                return_value[_alba_backend_guid]['error'] = 'not_allowed'
            except Exception as ex:
                return_value[_alba_backend_guid]['error'] = 'unknown'
                AlbaBackend._logger.exception('Collecting remote ALBA backend information failed with error: {0}'.format(ex))

        # Retrieve local summaries of all related OSDs of type ALBA_BACKEND
        lock = Lock()
        threads = []
        return_value = {}
        cluster_ips = [sr.ip for sr in StorageRouterList.get_storagerouters()]
        for osd in self.osds:
            if osd.osd_type == AlbaOSD.OSD_TYPES.ALBA_BACKEND and osd.metadata is not None:
                backend_info = osd.metadata['backend_info']
                connection_info = osd.metadata['backend_connection_info']
                connection_host = connection_info['host']
                alba_backend_guid = backend_info['linked_guid']
                return_value[alba_backend_guid] = {'name': backend_info['linked_name'],
                                                   'error': '',
                                                   'domain': None if osd.domain is None else {'guid': osd.domain_guid,
                                                                                              'name': osd.domain.name},
                                                   'preset': backend_info['linked_preset'],
                                                   'osd_id': backend_info['linked_alba_id'],
                                                   'local_ip': connection_host in cluster_ips,
                                                   'remote_host': connection_host}
                thread = Thread(target=_load_backend_info, args=(connection_info, alba_backend_guid))
                thread.start()
                threads.append(thread)

        for thread in threads:
            thread.join()
        return return_value
Пример #53
0
 def monitor_mds_layout():
     """
     Prints the current MDS layout
     :return: None
     :rtype: NoneType
     """
     try:
         while True:
             output = [
                 '', 'Open vStorage - MDS debug information',
                 '=====================================',
                 'timestamp: {0}'.format(datetime.datetime.now()), ''
             ]
             vpools_deployed = False
             for storagerouter in sorted(
                     StorageRouterList.get_storagerouters(),
                     key=lambda k: k.name):
                 vpools = set(sd.vpool
                              for sd in storagerouter.storagedrivers)
                 if len(vpools) > 0:
                     vpools_deployed = True
                     output.append('+ {0} ({1})'.format(
                         storagerouter.name, storagerouter.ip))
                 for vpool in sorted(vpools, key=lambda k: k.name):
                     output.append('  + {0}'.format(vpool.name))
                     for mds_service in sorted(vpool.mds_services,
                                               key=lambda k: k.number):
                         if mds_service.service.storagerouter_guid == storagerouter.guid:
                             masters, slaves = 0, 0
                             for junction in mds_service.vdisks:
                                 if junction.is_master:
                                     masters += 1
                                 else:
                                     slaves += 1
                             capacity = mds_service.capacity
                             if capacity == -1:
                                 capacity = 'infinite'
                             load, _ = MDSServiceController.get_mds_load(
                                 mds_service)
                             if load == float('inf'):
                                 load = 'infinite'
                             else:
                                 load = '{0}%'.format(round(load, 2))
                             output.append(
                                 '    + {0} - port {1} - {2} master(s), {3} slave(s) - capacity: {4}, load: {5}'
                                 .format(mds_service.number,
                                         mds_service.service.ports[0],
                                         masters, slaves, capacity, load))
             if vpools_deployed is False:
                 output.append('No vPools deployed')
             print '\x1b[2J\x1b[H' + '\n'.join(output)
             time.sleep(1)
     except KeyboardInterrupt:
         pass
Пример #54
0
    def restart_framework_and_memcache_services(clients,
                                                logger,
                                                offline_node_ips=None):
        """
        Restart framework and Memcached services
        :param clients: Clients on which to restart these services
        :type clients: dict
        :param logger: Logger object used for logging
        :type logger: ovs.extensions.generic.logger.Logger
        :param offline_node_ips: IP addresses of offline nodes in the cluster
        :type offline_node_ips: list
        :return: None
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        service_manager = ServiceFactory.get_manager()
        master_ips = [sr.ip for sr in StorageRouterList.get_masters()]
        slave_ips = [sr.ip for sr in StorageRouterList.get_slaves()]
        if offline_node_ips is None:
            offline_node_ips = []
        memcached = 'memcached'
        watcher = 'watcher-framework'
        support_agent = 'support-agent'
        for ip in master_ips + slave_ips:
            if ip not in offline_node_ips:
                if service_manager.has_service(watcher, clients[ip]):
                    ServiceFactory.change_service_state(
                        clients[ip], watcher, 'stop', logger)
        for ip in master_ips:
            if ip not in offline_node_ips:
                ServiceFactory.change_service_state(clients[ip], memcached,
                                                    'restart', logger)
        for ip in master_ips + slave_ips:
            if ip not in offline_node_ips:
                if service_manager.has_service(watcher, clients[ip]):
                    ServiceFactory.change_service_state(
                        clients[ip], watcher, 'start', logger)
                if service_manager.has_service(support_agent, clients[ip]):
                    ServiceFactory.change_service_state(
                        clients[ip], support_agent, 'restart', logger)
        VolatileFactory.store = None
Пример #55
0
 def check_scrub_partition_present():
     """
     Checks whether at least 1 scrub partition is present on any StorageRouter
     :return: True if at least 1 SCRUB role present in the cluster else False
     :rtype: bool
     """
     for storage_router in StorageRouterList.get_storagerouters():
         for disk in storage_router.disks:
             for partition in disk.partitions:
                 if DiskPartition.ROLES.SCRUB in partition.roles:
                     return True
     return False
Пример #56
0
    def get_my_storagerouter():
        """
        Returns unique machine storagerouter id
        """

        from ovs.dal.hybrids.storagerouter import StorageRouter
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        if not System.my_storagerouter_guid:
            for storagerouter in StorageRouterList.get_storagerouters():
                if storagerouter.machine_id == System.get_my_machine_id():
                    System.my_storagerouter_guid = storagerouter.guid
        return StorageRouter(System.my_storagerouter_guid)
Пример #57
0
 def set_config_params(self, vdisk, new_config_params, version):
     """
     Sets configuration parameters to a given vdisk.
     :param vdisk: Guid of the virtual disk to configure
     :param new_config_params: Configuration settings for the virtual disk
     :param version: API version
     """
     if version == 1 and 'dtl_target' in new_config_params:
         storage_router = StorageRouterList.get_by_ip(new_config_params['dtl_target'])
         if storage_router is None:
             raise NotAcceptable('API version 1 requires a Storage Router IP')
         new_config_params['dtl_target'] = storage_router.primary_failure_domain.guid
     return VDiskController.set_config_params.delay(vdisk_guid=vdisk.guid, new_config_params=new_config_params)
Пример #58
0
 def process_response(self, request, response):
     """
     Processes responses
     """
     _ = self
     # Process CORS responses
     if 'HTTP_ORIGIN' in request.META:
         storagerouters = StorageRouterList.get_storagerouters()
         allowed_origins = ['https://{0}'.format(storagerouter.ip) for storagerouter in storagerouters]
         if request.META['HTTP_ORIGIN'] in allowed_origins:
             response['Access-Control-Allow-Origin'] = request.META['HTTP_ORIGIN']
             response['Access-Control-Allow-Headers'] = 'x-requested-with, content-type, accept, origin, authorization'
             response['Access-Control-Allow-Methods'] = 'GET, POST, PUT, PATCH, DELETE, OPTIONS'
     return response