Exemplo n.º 1
0
    def _configure_amqp_to_volumedriver():
        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Update existing vPools')
        login = Configuration.get('/ovs/framework/messagequeue|user')
        password = Configuration.get('/ovs/framework/messagequeue|password')
        protocol = Configuration.get('/ovs/framework/messagequeue|protocol')

        uris = []
        for endpoint in Configuration.get(
                '/ovs/framework/messagequeue|endpoints'):
            uris.append({
                'amqp_uri':
                '{0}://{1}:{2}@{3}'.format(protocol, login, password, endpoint)
            })

        if Configuration.dir_exists('/ovs/vpools'):
            for vpool_guid in Configuration.list('/ovs/vpools'):
                for storagedriver_id in Configuration.list(
                        '/ovs/vpools/{0}/hosts'.format(vpool_guid)):
                    storagedriver_config = StorageDriverConfiguration(
                        vpool_guid, storagedriver_id)
                    storagedriver_config.configure_event_publisher(
                        events_amqp_routing_key=Configuration.get(
                            '/ovs/framework/messagequeue|queues.storagedriver'
                        ),
                        events_amqp_uris=uris)
                    storagedriver_config.save()
Exemplo n.º 2
0
 def _configure_arakoon_to_volumedriver(cluster_name):
     StorageDriverController._logger.info('Update existing vPools')
     config = ArakoonClusterConfig(cluster_id=cluster_name)
     arakoon_nodes = []
     for node in config.nodes:
         arakoon_nodes.append({
             'host': node.ip,
             'port': node.client_port,
             'node_id': node.name
         })
     if Configuration.dir_exists('/ovs/vpools'):
         for vpool_guid in Configuration.list('/ovs/vpools'):
             for storagedriver_id in Configuration.list(
                     '/ovs/vpools/{0}/hosts'.format(vpool_guid)):
                 storagedriver_config = StorageDriverConfiguration(
                     'storagedriver', vpool_guid, storagedriver_id)
                 storagedriver_config.load()
                 storagedriver_config.configure_volume_registry(
                     vregistry_arakoon_cluster_id=cluster_name,
                     vregistry_arakoon_cluster_nodes=arakoon_nodes)
                 storagedriver_config.configure_distributed_lock_store(
                     dls_type='Arakoon',
                     dls_arakoon_cluster_id=cluster_name,
                     dls_arakoon_cluster_nodes=arakoon_nodes)
                 storagedriver_config.save()
Exemplo n.º 3
0
    def validate_alba_backend_removal(alba_backend_info):
        """
        Validate whether the backend has been deleted properly
        alba_backend_info should be a dictionary containing:
            - guid
            - name
            - maintenance_service_names
        :param alba_backend_info: Information about the backend
        :return: None
        """
        Toolbox.verify_required_params(actual_params=alba_backend_info,
                                       required_params={'name': (str, None),
                                                        'guid': (str, Toolbox.regex_guid),
                                                        'maintenance_service_names': (list, None)},
                                       exact_match=True)

        alba_backend_guid = alba_backend_info['guid']
        alba_backend_name = alba_backend_info['name']
        backend = GeneralBackend.get_by_name(alba_backend_name)
        assert backend is None,\
            'Still found a backend in the model with name {0}'.format(alba_backend_name)

        # Validate services removed from model
        for service in GeneralService.get_services_by_name(ServiceType.SERVICE_TYPES.ALBA_MGR):
            assert service.name != '{0}-abm'.format(alba_backend_name),\
                'An AlbaManager service has been found with name {0}'.format(alba_backend_name)
        for service in GeneralService.get_services_by_name(ServiceType.SERVICE_TYPES.NS_MGR):
            assert service.name.startswith('{0}-nsm_'.format(alba_backend_name)) is False,\
                'An NamespaceManager service has been found with name {0}'.format(alba_backend_name)

        # Validate ALBA backend configuration structure
        alba_backend_key = '/ovs/alba/backends'
        actual_configuration_keys = [key for key in Configuration.list(alba_backend_key)]
        assert alba_backend_guid not in actual_configuration_keys,\
            'Configuration still contains an entry in {0} with guid {1}'.format(alba_backend_key, alba_backend_guid)

        # Validate Arakoon configuration structure
        arakoon_keys = [key for key in Configuration.list('/ovs/arakoon') if key.startswith(alba_backend_name)]
        assert len(arakoon_keys) == 0,\
            'Configuration still contains configurations for clusters: {0}'.format(', '.join(arakoon_keys))

        # Validate services
        for storagerouter in GeneralStorageRouter.get_storage_routers():
            root_client = SSHClient(endpoint=storagerouter, username='******')
            maintenance_services = alba_backend_info['maintenance_service_names']
            abm_arakoon_service_name = 'ovs-arakoon-{0}-abm'.format(alba_backend_name)
            nsm_arakoon_service_name = 'ovs-arakoon-{0}-nsm_0'.format(alba_backend_name)
            for service_name in [abm_arakoon_service_name, nsm_arakoon_service_name] + maintenance_services:
                assert GeneralService.has_service(name=service_name, client=root_client) is False,\
                    'Service {0} still deployed on Storage Router {1}'.format(service_name, storagerouter.name)
Exemplo n.º 4
0
    def get_unused_arakoon_metadata_and_claim(cluster_type, locked=True):
        """
        Retrieve arakoon cluster information based on its type
        :param cluster_type: Type of arakoon cluster (See ServiceType.ARAKOON_CLUSTER_TYPES)
        :type cluster_type: str
        :param locked: Execute this in a locked context
        :type locked: bool
        :return: Metadata of the arakoon cluster
        :rtype: dict
        """
        cluster_type = cluster_type.upper()
        if cluster_type not in ServiceType.ARAKOON_CLUSTER_TYPES:
            raise ValueError('Unsupported arakoon cluster type provided. Please choose from {0}'.format(', '.join(ServiceType.ARAKOON_CLUSTER_TYPES)))
        if not Configuration.dir_exists(ArakoonInstaller.CONFIG_ROOT):
            return None

        mutex = volatile_mutex('claim_arakoon_metadata', wait=10)
        try:
            if locked is True:
                mutex.acquire()

            for cluster_name in Configuration.list(ArakoonInstaller.CONFIG_ROOT):
                config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=False)
                config.load_config()
                arakoon_client = ArakoonInstaller.build_client(config)
                if arakoon_client.exists(ArakoonInstaller.METADATA_KEY):
                    metadata = json.loads(arakoon_client.get(ArakoonInstaller.METADATA_KEY))
                    if metadata['cluster_type'] == cluster_type and metadata['in_use'] is False and metadata['internal'] is False:
                        metadata['in_use'] = True
                        arakoon_client.set(ArakoonInstaller.METADATA_KEY, json.dumps(metadata, indent=4))
                        return metadata
        finally:
            if locked is True:
                mutex.release()
Exemplo n.º 5
0
    def discover_nodes(cls):
        # type: () -> Dict[str, AlbaNode]
        """
        Discover nodes by querying the config mgmt
        :return: The discovered nodes, mapped by their guid
        :rtype: Dict[str, AlbaNode]
        """
        nodes = {}
        model_node_ids = set(node.node_id
                             for node in AlbaNodeList.get_albanodes())
        found_node_ids = set()
        node_ids_by_type = {}
        for node_type, base_config_path in {
                AlbaNode.NODE_TYPES.ASD: ASD_NODE_BASE_PATH,
                AlbaNode.NODE_TYPES.S3: S3_NODE_BASE_PATH
        }.iteritems():
            if Configuration.dir_exists(base_config_path):
                node_ids = Configuration.list(base_config_path)
                node_ids_by_type[node_type] = node_ids

        for node_type, node_ids in node_ids_by_type.iteritems():
            for node_id in node_ids:
                if node_id not in model_node_ids and node_id not in found_node_ids:
                    node = cls.model_volatile_node(node_id, node_type)
                    nodes[node.guid] = node
                    found_node_ids.add(node.node_id)
        return nodes
    def remove_alba_arakoon_clusters(cls,
                                     alba_backend_guid,
                                     validate_clusters_reachable=True):
        # type: (basestring, bool) -> None
        """
        Removes all backend related Arakoon clusters
        :param alba_backend_guid: Guid of the ALBA Backend
        :type alba_backend_guid: str
        :param validate_clusters_reachable: Validate if all clusters are reachable
        :type validate_clusters_reachable: bool
        :return: None
        :rtype: NoneType
        """
        alba_backend = AlbaBackend(alba_backend_guid)
        if validate_clusters_reachable:
            AlbaArakoonController.abms_reachable(alba_backend)
            AlbaArakoonController.nsms_reachable(alba_backend)

        if alba_backend.abm_cluster is not None:
            AlbaArakoonController._logger.debug(
                'Removing clusters for ALBA Backend {0}'.format(
                    alba_backend.name))
            arakoon_clusters = list(Configuration.list('/ovs/arakoon'))
            # Remove the ABM cluster
            ABMInstaller.remove_abm_cluster(alba_backend.abm_cluster,
                                            arakoon_clusters)
            # Remove NSM Arakoon clusters and services
            for nsm_cluster in alba_backend.nsm_clusters:
                NSMInstaller.remove_nsm_cluster(nsm_cluster, arakoon_clusters)
 def _get_arakoon_clusters(cls, result_handler):
     """
     Retrieves all Arakoon clusters registered in this OVSCluster
     :param result_handler: Logging object
     :type result_handler: ovs.extensions.healthcheck.result.HCResults
     :return: Dict with the Arakoon cluster types as key and list with dicts which contain cluster names and pyrakoon clients
     :rtype: dict(str, list[dict])
     """
     result_handler.info('Fetching available arakoon clusters.', add_to_result=False)
     arakoon_clusters = {}
     for cluster_name in list(Configuration.list('/ovs/arakoon')) + ['cacc']:
         # Determine Arakoon type
         is_cacc = cluster_name == 'cacc'
         arakoon_config = ArakoonClusterConfig(cluster_id=cluster_name, load_config=not is_cacc)
         if is_cacc is True:
             with open(Configuration.CACC_LOCATION) as config_file:
                 contents = config_file.read()
             arakoon_config.read_config(contents=contents)
         try:
             arakoon_client = ArakoonInstaller.build_client(arakoon_config)
         except (ArakoonNoMaster, ArakoonNoMasterResult) as ex:
             result_handler.failure('Unable to find a master for Arakoon cluster {0}. (Message: {1})'.format(cluster_name, str(ex)),
                                    code=ErrorCodes.master_none)
         except Exception as ex:
             msg = 'Unable to connect to Arakoon cluster {0}. (Message: {1})'.format(cluster_name, str(ex))
             result_handler.exception(msg, code=ErrorCodes.unhandled_exception)
             cls.logger.exception(msg)
             continue
         metadata = json.loads(arakoon_client.get(ArakoonInstaller.METADATA_KEY))
         cluster_type = metadata['cluster_type']
         if cluster_type not in arakoon_clusters:
             arakoon_clusters[cluster_type] = []
         arakoon_clusters[cluster_type].append({'cluster_name': cluster_name, 'client': arakoon_client, 'config': arakoon_config})
     return arakoon_clusters
Exemplo n.º 8
0
 def model_albanodes(**kwargs):
     """
     Add all ALBA nodes known to the config platform to the model
     :param kwargs: Kwargs containing information regarding the node
     :type kwargs: dict
     :return: None
     :rtype: NoneType
     """
     _ = kwargs
     if Configuration.dir_exists('/ovs/alba/asdnodes'):
         for node_id in Configuration.list('/ovs/alba/asdnodes'):
             node = AlbaNodeList.get_albanode_by_node_id(node_id)
             if node is None:
                 node = AlbaNode()
             main_config = Configuration.get(
                 '/ovs/alba/asdnodes/{0}/config/main'.format(node_id))
             node.type = 'ASD'
             node.node_id = node_id
             node.ip = main_config['ip']
             node.port = main_config['port']
             node.username = main_config['username']
             node.password = main_config['password']
             node.storagerouter = StorageRouterList.get_by_ip(
                 main_config['ip'])
             node.save()
Exemplo n.º 9
0
    def _configure_amqp_to_volumedriver():
        Toolbox.log(logger=NodeTypeController._logger, messages='Update existing vPools')
        login = Configuration.get('/ovs/framework/messagequeue|user')
        password = Configuration.get('/ovs/framework/messagequeue|password')
        protocol = Configuration.get('/ovs/framework/messagequeue|protocol')

        uris = []
        for endpoint in Configuration.get('/ovs/framework/messagequeue|endpoints'):
            uris.append({'amqp_uri': '{0}://{1}:{2}@{3}'.format(protocol, login, password, endpoint)})

        if Configuration.dir_exists('/ovs/vpools'):
            for vpool_guid in Configuration.list('/ovs/vpools'):
                for storagedriver_id in Configuration.list('/ovs/vpools/{0}/hosts'.format(vpool_guid)):
                    storagedriver_config = StorageDriverConfiguration('storagedriver', vpool_guid, storagedriver_id)
                    storagedriver_config.load()
                    storagedriver_config.configure_event_publisher(events_amqp_routing_key=Configuration.get('/ovs/framework/messagequeue|queues.storagedriver'),
                                                                   events_amqp_uris=uris)
                    storagedriver_config.save()
Exemplo n.º 10
0
 def validate(*args, **kwargs):
     # check if arakoon cluster exists or not
     if kwargs['cluster_name']:
         if kwargs['cluster_name'] in list(Configuration.list('ovs/arakoon')):
             return func(*args, **kwargs)
         else:
             raise ArakoonClusterNotFoundError("Arakoon cluster does not exists: {0}".format(kwargs['cluster_name']))
     else:
         raise AttributeError("Missing parameter: cluster_name")
Exemplo n.º 11
0
 def _configure_arakoon_to_volumedriver(cluster_name):
     StorageDriverController._logger.info('Update existing vPools')
     config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=False)
     config.load_config()
     arakoon_nodes = []
     for node in config.nodes:
         arakoon_nodes.append({'host': node.ip,
                               'port': node.client_port,
                               'node_id': node.name})
     if Configuration.dir_exists('/ovs/vpools'):
         for vpool_guid in Configuration.list('/ovs/vpools'):
             for storagedriver_id in Configuration.list('/ovs/vpools/{0}/hosts'.format(vpool_guid)):
                 storagedriver_config = StorageDriverConfiguration('storagedriver', vpool_guid, storagedriver_id)
                 storagedriver_config.load()
                 storagedriver_config.configure_volume_registry(vregistry_arakoon_cluster_id=cluster_name,
                                                                vregistry_arakoon_cluster_nodes=arakoon_nodes)
                 storagedriver_config.configure_distributed_lock_store(dls_type='Arakoon',
                                                                       dls_arakoon_cluster_id=cluster_name,
                                                                       dls_arakoon_cluster_nodes=arakoon_nodes)
                 storagedriver_config.save(reload_config=True)
Exemplo n.º 12
0
    def _get_clusters_residing_on_local_node(result_handler):
        """
        Fetches the available local arakoon clusters of a cluster
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: dict with the arakoon info
        :rtype: dict
        """
        result_handler.info('Fetching available arakoon clusters.', add_to_result=False)
        arakoon_clusters = list(Configuration.list('/ovs/arakoon'))

        present_nodes = {}
        missing_nodes = {}
        missing_tlog = {}
        cluster_results = {'present': present_nodes, 'missing': missing_nodes, 'tlog_missing': missing_tlog}
        if len(arakoon_clusters) == 0:
            # no arakoon clusters on node
            result_handler.warning('No installed arakoon clusters detected on this system.')
            return cluster_results

        # add arakoon clusters
        for cluster in arakoon_clusters:
            arakoon_config = ArakoonClusterConfig(str(cluster))
            master_node_ids = [node.name for node in arakoon_config.nodes]

            if ArakoonHealthCheck.LOCAL_SR.machine_id not in master_node_ids:
                continue
            # add node that is available for arakoon cluster
            nodes_per_cluster_result = {}
            missing_nodes_per_cluster = {}
            missing_tlog_per_cluster = {}

            tlog_dir = arakoon_config.export_dict()[ArakoonHealthCheck.LOCAL_SR.machine_id]['tlog_dir']
            for node_id in master_node_ids:
                machine = StoragerouterHelper.get_by_machine_id(node_id)
                if machine is None:
                    # No information found about the storagerouter - old value in arakoon
                    missing_nodes_per_cluster.update({node_id: tlog_dir})
                    result_handler.warning('Could not fetch storagerouter information about node {0} that was stored in Arakoon {1}'.format(node_id, cluster), add_to_result=False)
                elif not tlog_dir:
                    missing_tlog_per_cluster.update({node_id: tlog_dir})
                    result_handler.warning('Arakoon {1} seems to have no tlog_dir on this node.'.format(node_id, cluster), add_to_result=False)
                else:
                    nodes_per_cluster_result.update({node_id: tlog_dir})
            present_nodes[cluster] = nodes_per_cluster_result
            missing_nodes[cluster] = missing_nodes_per_cluster
            missing_tlog[cluster] = missing_tlog_per_cluster

        return cluster_results
Exemplo n.º 13
0
 def _get_arakoon_clusters(cls, result_handler):
     """
     Retrieves all Arakoon clusters registered in this OVSCluster
     :param result_handler: Logging object
     :type result_handler: ovs.extensions.healthcheck.result.HCResults
     :return: Dict with the Arakoon cluster types as key and list with dicts which contain cluster names and pyrakoon clients
     :rtype: dict(str, list[dict])
     """
     result_handler.info('Fetching available arakoon clusters.',
                         add_to_result=False)
     arakoon_clusters = {}
     for cluster_name in list(
             Configuration.list('/ovs/arakoon')) + ['cacc']:
         # Determine Arakoon type
         is_cacc = cluster_name == 'cacc'
         arakoon_config = ArakoonClusterConfig(cluster_id=cluster_name,
                                               load_config=not is_cacc)
         if is_cacc is True:
             with open(Configuration.CACC_LOCATION) as config_file:
                 contents = config_file.read()
             arakoon_config.read_config(contents=contents)
         try:
             arakoon_client = ArakoonInstaller.build_client(arakoon_config)
         except (ArakoonNoMaster, ArakoonNoMasterResult) as ex:
             result_handler.failure(
                 'Unable to find a master for Arakoon cluster {0}. (Message: {1})'
                 .format(cluster_name, str(ex)),
                 code=ErrorCodes.master_none)
         except Exception as ex:
             msg = 'Unable to connect to Arakoon cluster {0}. (Message: {1})'.format(
                 cluster_name, str(ex))
             result_handler.exception(msg,
                                      code=ErrorCodes.unhandled_exception)
             cls.logger.exception(msg)
             continue
         metadata = json.loads(
             arakoon_client.get(ArakoonInstaller.METADATA_KEY))
         cluster_type = metadata['cluster_type']
         if cluster_type not in arakoon_clusters:
             arakoon_clusters[cluster_type] = []
         arakoon_clusters[cluster_type].append({
             'cluster_name': cluster_name,
             'client': arakoon_client,
             'config': arakoon_config
         })
     return arakoon_clusters
Exemplo n.º 14
0
 def _resave_all_config_entries(config_path='/ovs'):
     """
     Recursive functions which checks every config management key if its a directory or not.
     If not a directory, we retrieve the config and just save it again using the new indentation logic
     """
     for item in Configuration.list(config_path):
         new_path = config_path + '/' + item
         print new_path
         if Configuration.dir_exists(new_path) is True:
             _resave_all_config_entries(config_path=new_path)
         else:
             try:
                 _config = Configuration.get(new_path)
                 Configuration.set(new_path, _config)
             except:
                 _config = Configuration.get(new_path, raw=True)
                 Configuration.set(new_path, _config, raw=True)
Exemplo n.º 15
0
    def _get_free_ports(client):
        node_name = System.get_my_machine_id(client)
        clusters = []
        exclude_ports = []
        if Configuration.dir_exists(ArakoonInstaller.CONFIG_ROOT):
            for cluster_name in Configuration.list(ArakoonInstaller.CONFIG_ROOT):
                config = ArakoonClusterConfig(cluster_name, False)
                config.load_config()
                for node in config.nodes:
                    if node.name == node_name:
                        clusters.append(cluster_name)
                        exclude_ports.append(node.client_port)
                        exclude_ports.append(node.messaging_port)

        ports = System.get_free_ports(Configuration.get('/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)), exclude_ports, 2, client)
        ArakoonInstaller._logger.debug('  Loaded free ports {0} based on existing clusters {1}'.format(ports, clusters))
        return ports
Exemplo n.º 16
0
    def check_model_consistency(result_handler):
        """
        Verifies the information in the model
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        def dict_compare(dict1, dict2):
            d1_keys = set(dict1.keys())
            d2_keys = set(dict2.keys())
            intersect_keys = d1_keys.intersection(d2_keys)
            added = d1_keys - d2_keys
            removed = d2_keys - d1_keys
            modified = {key: (dict1[key], dict2[key]) for key in intersect_keys if dict1[key] != dict2[key]}
            same = set(key for key in intersect_keys if dict1[key] == dict2[key])
            return {'added': added, 'removed': removed, 'modified': modified, 'same': same}

        result_handler.info("Verifying arakoon information.", add_to_result=False)
        dal_ports = {}
        arakoon_ports = {}
        for service in ServiceHelper.get_local_arakoon_services():
            dal_ports[service.name] = service.ports
        for arakoon_cluster in Configuration.list('/ovs/arakoon'):
            e = Configuration.get('/ovs/arakoon/{0}/config'.format(arakoon_cluster), raw=True)
            config = ConfigParser.RawConfigParser()
            config.readfp(StringIO(e))
            for section in config.sections():
                if section == ArakoonHealthCheck.LOCAL_SR.machine_id:
                    process_name = "arakoon-{0}".format(arakoon_cluster)
                    arakoon_ports[process_name] = [int(config.get(section, 'client_port')), int(config.get(section, 'messaging_port'))]  # cast port strings to int
                    break
        diff = dict_compare(dal_ports, arakoon_ports)
        if len(diff['added']) > 0 or len(diff['removed']) > 0:
            if len(diff['added']) > 0:
                result_handler.warning('Found {0} in DAL but not in Arakoon.'.format(diff['added']))
            if len(diff['removed']) > 0:
                result_handler.warning('Found {0} in Arakoon but not in DAL.'.format(diff['removed']))
        else:
            result_handler.success('Arakoon info for DAL and Arakoon are the same.')
        if len(diff['modified']) > 0:
            result_handler.warning('The following items have changed: {0}.'.format(diff['modified']))
        else:
            result_handler.success('No items have changed.')
Exemplo n.º 17
0
    def get_unused_arakoon_metadata_and_claim(cluster_type, locked=True):
        """
        Retrieve arakoon cluster information based on its type
        :param cluster_type: Type of arakoon cluster (See ServiceType.ARAKOON_CLUSTER_TYPES)
        :type cluster_type: str
        :param locked: Execute this in a locked context
        :type locked: bool
        :return: Metadata of the arakoon cluster
        :rtype: dict
        """
        cluster_type = cluster_type.upper()
        if cluster_type not in ServiceType.ARAKOON_CLUSTER_TYPES:
            raise ValueError(
                'Unsupported arakoon cluster type provided. Please choose from {0}'
                .format(', '.join(ServiceType.ARAKOON_CLUSTER_TYPES)))
        if not Configuration.dir_exists(ArakoonInstaller.CONFIG_ROOT):
            return None

        mutex = volatile_mutex('claim_arakoon_metadata', wait=10)
        try:
            if locked is True:
                mutex.acquire()

            for cluster_name in Configuration.list(
                    ArakoonInstaller.CONFIG_ROOT):
                config = ArakoonClusterConfig(cluster_id=cluster_name,
                                              filesystem=False)
                config.load_config()
                arakoon_client = ArakoonInstaller.build_client(config)
                if arakoon_client.exists(ArakoonInstaller.METADATA_KEY):
                    metadata = json.loads(
                        arakoon_client.get(ArakoonInstaller.METADATA_KEY))
                    if metadata['cluster_type'] == cluster_type and metadata[
                            'in_use'] is False and metadata[
                                'internal'] is False:
                        metadata['in_use'] = True
                        arakoon_client.set(ArakoonInstaller.METADATA_KEY,
                                           json.dumps(metadata, indent=4))
                        return metadata
        finally:
            if locked is True:
                mutex.release()
Exemplo n.º 18
0
    def add_alba_backend(name, scaling='LOCAL', wait=True):
        """
        Put an ALBA backend in the model
        :param name: Name of the backend
        :param scaling: Alba backend can be LOCAL or GLOBAL
        :param wait: Wait for backend to enter RUNNING state
        :return: Newly created ALBA backend
        """
        alba_backend = GeneralAlba.get_by_name(name)
        if alba_backend is None:
            backend = GeneralBackend.add_backend(name, 'alba')
            alba_backend = AlbaBackend(GeneralAlba.api.add('alba/backends', {'backend_guid': backend.guid,
                                                                             'scaling': scaling})['guid'])
            if wait is True:
                GeneralAlba.wait_for_alba_backend_status(alba_backend)

        alba_nodes = [alba_node for alba_node in Configuration.list('/ovs/alba/asdnodes')]
        if len(alba_nodes):
            AlbaNodeController.model_albanodes()

        return GeneralAlba.get_by_name(name)
Exemplo n.º 19
0
 def model_albanodes(**kwargs):
     """
     Add all ALBA nodes known to the config platform to the model
     :param kwargs: Kwargs containing information regarding the node
     :type kwargs: dict
     :return: None
     """
     _ = kwargs
     if Configuration.dir_exists('/ovs/alba/asdnodes'):
         for node_id in Configuration.list('/ovs/alba/asdnodes'):
             node = AlbaNodeList.get_albanode_by_node_id(node_id)
             if node is None:
                 node = AlbaNode()
             main_config = Configuration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id))
             node.type = 'ASD'
             node.node_id = node_id
             node.ip = main_config['ip']
             node.port = main_config['port']
             node.username = main_config['username']
             node.password = main_config['password']
             node.storagerouter = StorageRouterList.get_by_ip(main_config['ip'])
             node.save()
Exemplo n.º 20
0
    def _get_free_ports(client):
        node_name = System.get_my_machine_id(client)
        clusters = []
        exclude_ports = []
        if Configuration.dir_exists(ArakoonInstaller.CONFIG_ROOT):
            for cluster_name in Configuration.list(
                    ArakoonInstaller.CONFIG_ROOT):
                config = ArakoonClusterConfig(cluster_name, False)
                config.load_config()
                for node in config.nodes:
                    if node.name == node_name:
                        clusters.append(cluster_name)
                        exclude_ports.append(node.client_port)
                        exclude_ports.append(node.messaging_port)

        ports = System.get_free_ports(
            Configuration.get(
                '/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)),
            exclude_ports, 2, client)
        ArakoonInstaller._logger.debug(
            '  Loaded free ports {0} based on existing clusters {1}'.format(
                ports, clusters))
        return ports
Exemplo n.º 21
0
    def check_vpool_cleanup(vpool_info, storagerouters=None):
        """
        Check if everything related to a vPool has been cleaned up on the storagerouters provided
        vpool_info should be a dictionary containing:
            - type
            - guid
            - files
            - directories
            - name (optional)
            - vpool (optional)
            If vpool is provided:
                - storagerouters need to be provided, because on these Storage Routers, we check whether the vPool has been cleaned up
            If name is provided:
                - If storagerouters is NOT provided, all Storage Routers will be checked for a correct vPool removal
                - If storagerouters is provided, only these Storage Routers will be checked for a correct vPool removal

        :param vpool_info: Information about the vPool
        :param storagerouters: Storage Routers to check if vPool has been cleaned up
        :return: None
        """
        for required_param in ["type", "guid", "files", "directories"]:
            if required_param not in vpool_info:
                raise ValueError("Incorrect vpool_info provided")
        if "vpool" in vpool_info and "name" in vpool_info:
            raise ValueError("vpool and name are mutually exclusive")
        if "vpool" not in vpool_info and "name" not in vpool_info:
            raise ValueError("Either vpool or vpool_name needs to be provided")

        vpool = vpool_info.get("vpool")
        vpool_name = vpool_info.get("name")
        vpool_guid = vpool_info["guid"]
        vpool_type = vpool_info["type"]
        files = vpool_info["files"]
        directories = vpool_info["directories"]

        supported_backend_types = GeneralBackend.get_valid_backendtypes()
        if vpool_type not in supported_backend_types:
            raise ValueError(
                "Unsupported Backend Type provided. Please choose from: {0}".format(", ".join(supported_backend_types))
            )
        if storagerouters is None:
            storagerouters = GeneralStorageRouter.get_storage_routers()

        if vpool_name is not None:
            assert (
                GeneralVPool.get_vpool_by_name(vpool_name=vpool_name) is None
            ), "A vPool with name {0} still exists".format(vpool_name)

        # Prepare some fields to check
        vpool_name = vpool.name if vpool else vpool_name
        vpool_services = ["ovs-dtl_{0}".format(vpool_name), "ovs-volumedriver_{0}".format(vpool_name)]
        if vpool_type == "alba":
            vpool_services.append("ovs-albaproxy_{0}".format(vpool_name))

        # Check configuration
        if vpool is None:
            assert (
                Configuration.exists("/ovs/vpools/{0}".format(vpool_guid), raw=True) is False
            ), "vPool config still found in etcd"
        else:
            remaining_sd_ids = set([storagedriver.storagedriver_id for storagedriver in vpool.storagedrivers])
            current_sd_ids = set([item for item in Configuration.list("/ovs/vpools/{0}/hosts".format(vpool_guid))])
            assert not remaining_sd_ids.difference(
                current_sd_ids
            ), "There are more storagedrivers modelled than present in etcd"
            assert not current_sd_ids.difference(
                remaining_sd_ids
            ), "There are more storagedrivers in etcd than present in model"

        # Perform checks on all storagerouters where vpool was removed
        for storagerouter in storagerouters:

            # Check MDS services
            mds_services = GeneralService.get_services_by_name(ServiceType.SERVICE_TYPES.MD_SERVER)
            assert (
                len(
                    [
                        mds_service
                        for mds_service in mds_services
                        if mds_service.storagerouter_guid == storagerouter.guid
                    ]
                )
                == 0
            ), "There are still MDS services present for Storage Router {0}".format(storagerouter.ip)

            # Check services
            root_client = SSHClient(storagerouter, username="******")
            for service in vpool_services:
                if ServiceManager.has_service(service, client=root_client):
                    raise RuntimeError(
                        "Service {0} is still configured on Storage Router {1}".format(service, storagerouter.ip)
                    )

            # Check KVM vpool
            if GeneralHypervisor.get_hypervisor_type() == "KVM":
                vpool_overview = root_client.run(["virsh", "pool-list", "--all"]).splitlines()
                vpool_overview.pop(1)
                vpool_overview.pop(0)
                for vpool_info in vpool_overview:
                    kvm_vpool_name = vpool_info.split()[0].strip()
                    if vpool_name == kvm_vpool_name:
                        raise ValueError(
                            "vPool {0} is still defined on Storage Router {1}".format(vpool_name, storagerouter.ip)
                        )

            # Check file and directory existence
            if storagerouter.guid not in directories:
                raise ValueError("Could not find directory information for Storage Router {0}".format(storagerouter.ip))
            if storagerouter.guid not in files:
                raise ValueError("Could not find file information for Storage Router {0}".format(storagerouter.ip))

            for directory in directories[storagerouter.guid]:
                assert (
                    root_client.dir_exists(directory) is False
                ), "Directory {0} still exists on Storage Router {1}".format(directory, storagerouter.ip)
            for file_name in files[storagerouter.guid]:
                assert (
                    root_client.file_exists(file_name) is False
                ), "File {0} still exists on Storage Router {1}".format(file_name, storagerouter.ip)

            # Look for errors in storagedriver log
            for error_type in ["error", "fatal"]:
                cmd = "cat -vet /var/log/ovs/volumedriver/{0}.log | tail -1000 | grep ' {1} '; echo true > /dev/null".format(
                    vpool_name, error_type
                )
                errors = []
                for line in root_client.run(cmd, allow_insecure=True).splitlines():
                    if "HierarchicalArakoon" in line:
                        continue
                    errors.append(line)
                if len(errors) > 0:
                    if error_type == "error":
                        print "Volumedriver log file contains errors on Storage Router {0}\n - {1}".format(
                            storagerouter.ip, "\n - ".join(errors)
                        )
                    else:
                        raise RuntimeError(
                            "Fatal errors found in volumedriver log file on Storage Router {0}\n - {1}".format(
                                storagerouter.ip, "\n - ".join(errors)
                            )
                        )
Exemplo n.º 22
0
    def services_running(self, target):
        """
        Check all services are running
        :param target: Target to check
        :return: Boolean
        """
        try:
            key = 'ovs-watcher-{0}'.format(str(uuid.uuid4()))
            value = str(time.time())

            if target in ['config', 'framework']:
                self.log_message(target, 'Testing configuration store...', 0)
                from ovs.extensions.generic.configuration import Configuration
                try:
                    Configuration.list('/')
                except Exception as ex:
                    self.log_message(
                        target,
                        '  Error during configuration store test: {0}'.format(
                            ex), 2)
                    return False

                from ovs.extensions.db.arakooninstaller import ArakoonInstaller, ArakoonClusterConfig
                from ovs_extensions.db.arakoon.pyrakoon.pyrakoon.compat import NoGuarantee
                from ovs.extensions.generic.configuration import Configuration
                with open(Configuration.CACC_LOCATION) as config_file:
                    contents = config_file.read()
                config = ArakoonClusterConfig(
                    cluster_id=Configuration.ARAKOON_NAME, load_config=False)
                config.read_config(contents=contents)
                client = ArakoonInstaller.build_client(config)
                contents = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY,
                                      consistency=NoGuarantee())
                if Watcher.LOG_CONTENTS != contents:
                    try:
                        config.read_config(
                            contents=contents
                        )  # Validate whether the contents are not corrupt
                    except Exception as ex:
                        self.log_message(
                            target,
                            '  Configuration stored in configuration store seems to be corrupt: {0}'
                            .format(ex), 2)
                        return False
                    temp_filename = '{0}~'.format(Configuration.CACC_LOCATION)
                    with open(temp_filename, 'w') as config_file:
                        config_file.write(contents)
                        config_file.flush()
                        os.fsync(config_file)
                    os.rename(temp_filename, Configuration.CACC_LOCATION)
                    Watcher.LOG_CONTENTS = contents
                self.log_message(target, '  Configuration store OK', 0)

            if target == 'framework':
                # Volatile
                self.log_message(target, 'Testing volatile store...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        try:
                            logging.disable(logging.WARNING)
                            from ovs.extensions.storage.volatilefactory import VolatileFactory
                            VolatileFactory.store = None
                            volatile = VolatileFactory.get_client()
                            volatile.set(key, value)
                            if volatile.get(key) == value:
                                volatile.delete(key)
                                break
                            volatile.delete(key)
                        finally:
                            logging.disable(logging.NOTSET)
                    except Exception as message:
                        self.log_message(
                            target,
                            '  Error during volatile store test: {0}'.format(
                                message), 2)
                    key = 'ovs-watcher-{0}'.format(str(
                        uuid.uuid4()))  # Get another key
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(target,
                                     '  Volatile store not working correctly',
                                     2)
                    return False
                self.log_message(
                    target,
                    '  Volatile store OK after {0} tries'.format(tries), 0)

                # Persistent
                self.log_message(target, 'Testing persistent store...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        try:
                            logging.disable(logging.WARNING)
                            persistent = PersistentFactory.get_client()
                            persistent.nop()
                            break
                        finally:
                            logging.disable(logging.NOTSET)
                    except Exception as message:
                        self.log_message(
                            target,
                            '  Error during persistent store test: {0}'.format(
                                message), 2)
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(
                        target, '  Persistent store not working correctly', 2)
                    return False
                self.log_message(
                    target,
                    '  Persistent store OK after {0} tries'.format(tries), 0)

            if target == 'volumedriver':
                # Arakoon, voldrv cluster
                self.log_message(target, 'Testing arakoon (voldrv)...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        from ovs.extensions.generic.configuration import Configuration
                        from ovs_extensions.storage.persistent.pyrakoonstore import PyrakoonStore
                        cluster_name = str(
                            Configuration.get(
                                '/ovs/framework/arakoon_clusters|voldrv'))
                        configuration = Configuration.get(
                            '/ovs/arakoon/{0}/config'.format(cluster_name),
                            raw=True)
                        client = PyrakoonStore(cluster=cluster_name,
                                               configuration=configuration)
                        client.nop()
                        break
                    except Exception as message:
                        self.log_message(
                            target,
                            '  Error during arakoon (voldrv) test: {0}'.format(
                                message), 2)
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(
                        target, '  Arakoon (voldrv) not working correctly', 2)
                    return False
                self.log_message(target, '  Arakoon (voldrv) OK', 0)

            if target in ['framework', 'volumedriver']:
                # RabbitMQ
                self.log_message(target, 'Test rabbitMQ...', 0)
                import pika
                from ovs.extensions.generic.configuration import Configuration
                messagequeue = Configuration.get('/ovs/framework/messagequeue')
                rmq_servers = messagequeue['endpoints']
                good_node = False
                for server in rmq_servers:
                    try:
                        connection_string = '{0}://{1}:{2}@{3}/%2F'.format(
                            messagequeue['protocol'], messagequeue['user'],
                            messagequeue['password'], server)
                        connection = pika.BlockingConnection(
                            pika.URLParameters(connection_string))
                        channel = connection.channel()
                        channel.basic_publish(
                            '', 'ovs-watcher', str(time.time()),
                            pika.BasicProperties(content_type='text/plain',
                                                 delivery_mode=1))
                        connection.close()
                        good_node = True
                    except Exception as message:
                        self.log_message(
                            target,
                            '  Error during rabbitMQ test on node {0}: {1}'.
                            format(server, message), 2)
                if good_node is False:
                    self.log_message(
                        target, '  No working rabbitMQ node could be found', 2)
                    return False
                self.log_message(target, '  RabbitMQ test OK', 0)
                self.log_message(target, 'All tests OK', 0)

            return True
        except Exception as ex:
            self.log_message(target, 'Unexpected exception: {0}'.format(ex), 2)
            return False
Exemplo n.º 23
0
    def promote_or_demote_node(node_action,
                               cluster_ip=None,
                               execute_rollback=False):
        """
        Promotes or demotes the local node
        :param node_action: Demote or promote
        :type node_action: str
        :param cluster_ip: IP of node to promote or demote
        :type cluster_ip: str
        :param execute_rollback: In case of failure revert the changes made
        :type execute_rollback: bool
        :return: None
        """

        if node_action not in ('promote', 'demote'):
            raise ValueError('Nodes can only be promoted or demoted')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Open vStorage Setup - {0}'.format(
                        node_action.capitalize()),
                    boxed=True)
        try:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Collecting information',
                        title=True)

            machine_id = System.get_my_machine_id()
            if Configuration.get('/ovs/framework/hosts/{0}/setupcompleted'.
                                 format(machine_id)) is False:
                raise RuntimeError('No local OVS setup found.')

            if cluster_ip and not re.match(Toolbox.regex_ip, cluster_ip):
                raise RuntimeError(
                    'Incorrect IP provided ({0})'.format(cluster_ip))

            if cluster_ip:
                client = SSHClient(endpoint=cluster_ip)
                machine_id = System.get_my_machine_id(client)

            node_type = Configuration.get(
                '/ovs/framework/hosts/{0}/type'.format(machine_id))
            if node_action == 'promote' and node_type == 'MASTER':
                raise RuntimeError('This node is already master.')
            elif node_action == 'demote' and node_type == 'EXTRA':
                raise RuntimeError('This node should be a master.')
            elif node_type not in ['MASTER', 'EXTRA']:
                raise RuntimeError('This node is not correctly configured.')

            master_ip = None
            offline_nodes = []

            online = True
            target_client = None
            if node_action == 'demote' and cluster_ip:  # Demote an offline node
                from ovs.dal.lists.storagerouterlist import StorageRouterList
                from ovs.lib.storagedriver import StorageDriverController

                ip = cluster_ip
                unique_id = None
                ip_client_map = {}
                for storage_router in StorageRouterList.get_storagerouters():
                    try:
                        client = SSHClient(storage_router.ip, username='******')
                        if storage_router.node_type == 'MASTER':
                            master_ip = storage_router.ip
                        ip_client_map[storage_router.ip] = client
                    except UnableToConnectException:
                        if storage_router.ip == cluster_ip:
                            online = False
                            unique_id = storage_router.machine_id
                            StorageDriverController.mark_offline(
                                storagerouter_guid=storage_router.guid)
                        offline_nodes.append(storage_router)
                if online is True:
                    raise RuntimeError(
                        "If the node is online, please use 'ovs setup demote' executed on the node you wish to demote"
                    )
                if master_ip is None:
                    raise RuntimeError(
                        'Failed to retrieve another responsive MASTER node')

            else:
                target_password = Toolbox.ask_validate_password(
                    ip='127.0.0.1', logger=NodeTypeController._logger)
                target_client = SSHClient('127.0.0.1',
                                          username='******',
                                          password=target_password)

                unique_id = System.get_my_machine_id(target_client)
                ip = Configuration.get(
                    '/ovs/framework/hosts/{0}/ip'.format(unique_id))

                storagerouter_info = NodeTypeController.retrieve_storagerouter_info_via_host(
                    ip=target_client.ip, password=target_password)
                node_ips = [
                    sr_info['ip']
                    for sr_info in storagerouter_info.itervalues()
                ]
                master_node_ips = [
                    sr_info['ip']
                    for sr_info in storagerouter_info.itervalues()
                    if sr_info['type'] == 'master' and sr_info['ip'] != ip
                ]
                if len(master_node_ips) == 0:
                    if node_action == 'promote':
                        raise RuntimeError('No master node could be found')
                    else:
                        raise RuntimeError(
                            'It is not possible to remove the only master')

                master_ip = master_node_ips[0]
                ip_client_map = dict(
                    (node_ip, SSHClient(node_ip, username='******'))
                    for node_ip in node_ips)

            if node_action == 'demote':
                for cluster_name in Configuration.list('/ovs/arakoon'):
                    config = ArakoonClusterConfig(cluster_id=cluster_name)
                    arakoon_client = ArakoonInstaller.build_client(config)
                    metadata = json.loads(
                        arakoon_client.get(ArakoonInstaller.METADATA_KEY))
                    if len(config.nodes) == 1 and config.nodes[
                            0].ip == ip and metadata.get('internal') is True:
                        raise RuntimeError(
                            'Demote is not supported when single node Arakoon cluster(s) are present on the node to be demoted.'
                        )

            configure_rabbitmq = Toolbox.is_service_internally_managed(
                service='rabbitmq')
            configure_memcached = Toolbox.is_service_internally_managed(
                service='memcached')
            if node_action == 'promote':
                try:
                    NodeTypeController.promote_node(
                        cluster_ip=ip,
                        master_ip=master_ip,
                        ip_client_map=ip_client_map,
                        unique_id=unique_id,
                        configure_memcached=configure_memcached,
                        configure_rabbitmq=configure_rabbitmq)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.demote_node(
                            cluster_ip=ip,
                            master_ip=master_ip,
                            ip_client_map=ip_client_map,
                            unique_id=unique_id,
                            unconfigure_memcached=configure_memcached,
                            unconfigure_rabbitmq=configure_rabbitmq,
                            offline_nodes=offline_nodes)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback', 'demote')
                    raise
            else:
                try:
                    NodeTypeController.demote_node(
                        cluster_ip=ip,
                        master_ip=master_ip,
                        ip_client_map=ip_client_map,
                        unique_id=unique_id,
                        unconfigure_memcached=configure_memcached,
                        unconfigure_rabbitmq=configure_rabbitmq,
                        offline_nodes=offline_nodes)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.promote_node(
                            cluster_ip=ip,
                            master_ip=master_ip,
                            ip_client_map=ip_client_map,
                            unique_id=unique_id,
                            configure_memcached=configure_memcached,
                            configure_rabbitmq=configure_rabbitmq)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback',
                                                 'promote')
                    raise

            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='{0} complete.'.format(
                            node_action.capitalize()),
                        boxed=True)
        except Exception as exception:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=['An unexpected error occurred:',
                          str(exception)],
                boxed=True,
                loglevel='exception')
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=
                'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.',
                boxed=True,
                loglevel='error')
            sys.exit(1)
Exemplo n.º 24
0
    def test_alba_arakoon_checkup(self):
        """
        Validates whether the ALBA Arakoon checkup works (Manual and Scheduled)
        """
        ovs_structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1]})
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_backends': [[1, 'LOCAL']]})

        #############################
        # SCHEDULED_ARAKOON_CHECKUP #
        #############################
        # Create an ABM and NSM cluster for ALBA Backend 1 and do some basic validations
        sr_1 = ovs_structure['storagerouters'][1]
        ab_1 = alba_structure['alba_backends'][1]
        MockedSSHClient._run_returns[sr_1.ip] = {}
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None
        AlbaController.add_cluster(ab_1.guid)

        abm_cluster_name = '{0}-abm'.format(ab_1.name)
        nsm_cluster_name = '{0}-nsm_0'.format(ab_1.name)
        arakoon_clusters = sorted(Configuration.list('/ovs/arakoon'))
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)

        abm_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=abm_cluster_name)
        nsm_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=nsm_cluster_name)
        self.assertTrue(expr=abm_metadata['in_use'])
        self.assertTrue(expr=nsm_metadata['in_use'])

        # Run scheduled Arakoon checkup and validate amount of Arakoon clusters did not change
        AlbaArakoonController.scheduled_alba_arakoon_checkup()
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)
        self.assertEqual(first=len(ab_1.abm_cluster.abm_services), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services),
                         second=1)

        # Create 2 additional StorageRouters
        srs = DalHelper.build_dal_structure(
            structure={'storagerouters': [2, 3]},
            previous_structure=ovs_structure)['storagerouters']
        sr_2 = srs[2]
        sr_3 = srs[3]

        # Run scheduled checkup again and do some validations
        MockedSSHClient._run_returns[sr_2.ip] = {}
        MockedSSHClient._run_returns[sr_3.ip] = {}
        MockedSSHClient._run_returns[sr_2.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_3.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_3/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_2.ip][
            'arakoon --node {0} -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-abm/config -catchup-only'
            .format(sr_2.machine_id)] = None
        MockedSSHClient._run_returns[sr_3.ip][
            'arakoon --node {0} -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-abm/config -catchup-only'
            .format(sr_3.machine_id)] = None
        AlbaArakoonController.scheduled_alba_arakoon_checkup()
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)
        self.assertEqual(first=len(ab_1.abm_cluster.abm_services),
                         second=3)  # Gone up from 1 to 3
        self.assertEqual(first=len(ab_1.nsm_clusters), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services),
                         second=1)  # Still 1 since NSM checkup hasn't ran yet

        # Make sure 1 StorageRouter is unreachable
        SSHClient._raise_exceptions[sr_3.ip] = {
            'users': ['ovs'],
            'exception': UnableToConnectException('No route to host')
        }
        AlbaArakoonController.scheduled_alba_arakoon_checkup()
        alba_logs = Logger._logs.get('lib', [])
        self.assertIn(
            member='Storage Router with IP {0} is not reachable'.format(
                sr_3.ip),
            container=alba_logs)

        ##########################
        # MANUAL_ARAKOON_CHECKUP #
        ##########################
        AlbaDalHelper.setup()  # Clear everything
        ovs_structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1]})
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_backends': [[1, 'LOCAL']]})
        sr_1 = ovs_structure['storagerouters'][1]
        ab_1 = alba_structure['alba_backends'][1]
        MockedSSHClient._run_returns[sr_1.ip] = {}
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None
        AlbaController.add_cluster(ab_1.guid)

        # Run manual Arakoon checkup and validate amount of Arakoon clusters did not change
        AlbaArakoonController.manual_alba_arakoon_checkup(
            alba_backend_guid=ab_1.guid, nsm_clusters=[], abm_cluster=None)
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)
        self.assertEqual(first=len(ab_1.abm_cluster.abm_services), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services),
                         second=1)

        # Test some error paths
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=['no_abm_cluster_passed'])
        self.assertEqual(
            first=raise_info.exception.message,
            second='Both ABM cluster and NSM clusters must be provided')
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=[],
                abm_cluster='no_nsm_clusters_passed')
        self.assertEqual(
            first=raise_info.exception.message,
            second='Both ABM cluster and NSM clusters must be provided')
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=[nsm_cluster_name],
                abm_cluster=abm_cluster_name)
        self.assertEqual(first=raise_info.exception.message,
                         second='Cluster {0} has already been claimed'.format(
                             abm_cluster_name))
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=['non-existing-nsm-cluster'],
                abm_cluster='non-existing-abm-cluster')
        self.assertEqual(
            first=raise_info.exception.message,
            second=
            'Could not find an Arakoon cluster with name: non-existing-abm-cluster'
        )

        # Recreate ALBA Backend with Arakoon clusters
        AlbaDalHelper.setup()  # Clear everything
        ovs_structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1]})
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_backends': [[1, 'LOCAL']]})
        sr_1 = ovs_structure['storagerouters'][1]
        ab_1 = alba_structure['alba_backends'][1]

        # Create some Arakoon clusters to be claimed by the manual checkup
        for cluster_name, cluster_type in {
                'manual-abm-1': ServiceType.ARAKOON_CLUSTER_TYPES.ABM,
                'manual-abm-2': ServiceType.ARAKOON_CLUSTER_TYPES.ABM,
                'manual-nsm-1': ServiceType.ARAKOON_CLUSTER_TYPES.NSM,
                'manual-nsm-2': ServiceType.ARAKOON_CLUSTER_TYPES.NSM,
                'manual-nsm-3': ServiceType.ARAKOON_CLUSTER_TYPES.NSM
        }.iteritems():
            arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
            arakoon_installer.create_cluster(
                cluster_type=cluster_type,
                ip=sr_1.ip,
                base_dir=DalHelper.CLUSTER_DIR.format(cluster_name),
                internal=False)
            arakoon_installer.start_cluster()
            arakoon_installer.unclaim_cluster()
        AlbaArakoonController.manual_alba_arakoon_checkup(
            alba_backend_guid=ab_1.guid,
            nsm_clusters=['manual-nsm-1', 'manual-nsm-3'],
            abm_cluster='manual-abm-2')

        # Validate the correct clusters have been claimed by the manual checkup
        unused_abms = ArakoonInstaller.get_unused_arakoon_clusters(
            cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.ABM)
        unused_nsms = ArakoonInstaller.get_unused_arakoon_clusters(
            cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.NSM)
        self.assertEqual(first=len(unused_abms), second=1)
        self.assertEqual(first=len(unused_nsms), second=1)
        self.assertEqual(first=unused_abms[0]['cluster_name'],
                         second='manual-abm-1')
        self.assertEqual(first=unused_nsms[0]['cluster_name'],
                         second='manual-nsm-2')
Exemplo n.º 25
0
 def _cluster_node_config(self):
     """
     Prepares a ClusterNodeConfig dict for the StorageDriver process
     """
     from ovs.extensions.generic.configuration import Configuration, NotFoundException
     rdma = Configuration.get('/ovs/framework/rdma')
     distance_map = {}
     primary_domains = []
     secondary_domains = []
     for junction in self.storagerouter.domains:
         if junction.backup is False:
             primary_domains.append(junction.domain_guid)
         else:
             secondary_domains.append(junction.domain_guid)
     # @todo implement more race-conditions guarantees. Current guarantee is the single update invalidating the value
     # through cluster_registry_checkup
     try:
         storagerouters_marked_for_update = list(
             Configuration.list(VPOOL_UPDATE_KEY))
     except NotFoundException:
         storagerouters_marked_for_update = []
     for sd in self.vpool.storagedrivers:
         if sd.guid == self.guid:
             continue
         if sd.storagerouter_guid in storagerouters_marked_for_update:
             distance_map[str(
                 sd.storagedriver_id)] = StorageDriver.DISTANCES.FAR
         elif len(primary_domains) == 0:
             distance_map[str(
                 sd.storagedriver_id)] = StorageDriver.DISTANCES.NEAR
         else:
             distance = StorageDriver.DISTANCES.INFINITE
             for junction in sd.storagerouter.domains:
                 if junction.backup is False:
                     if junction.domain_guid in primary_domains:
                         distance = min(distance,
                                        StorageDriver.DISTANCES.NEAR)
                         break  # We can break here since we reached the minimum distance
                     elif junction.domain_guid in secondary_domains:
                         distance = min(distance,
                                        StorageDriver.DISTANCES.FAR)
             distance_map[str(sd.storagedriver_id)] = distance
     return {
         'vrouter_id':
         self.storagedriver_id,
         'host':
         self.storage_ip,
         'message_port':
         self.ports['management'],
         'xmlrpc_host':
         self.cluster_ip,
         'xmlrpc_port':
         self.ports['xmlrpc'],
         'failovercache_host':
         self.storage_ip,
         'failovercache_port':
         self.ports['dtl'],
         'network_server_uri':
         '{0}://{1}:{2}'.format('rdma' if rdma else 'tcp', self.storage_ip,
                                self.ports['edge']),
         'node_distance_map':
         distance_map
     }
    def validate_arakoon_config_files(storagerouters, cluster_name=None):
        """
        Verify whether all arakoon configurations are correct
        :param storagerouters: Storage Routers
        :param cluster_name: Name of the Arakoon cluster
        :return:
        """
        storagerouters.sort(key=lambda k: k.ip)
        TestArakoon.logger.info('Validating arakoon files for {0}'.format(', '.join([sr.ip for sr in storagerouters])))

        nr_of_configs_on_master = 0
        nr_of_configs_on_extra = 0

        node_ids = dict()
        matrix = dict()
        for sr in storagerouters:
            node_ids[sr.ip] = sr.machine_id
            configs_to_check = []
            matrix[sr] = dict()
            if cluster_name is not None:
                if Configuration.exists(GeneralArakoon.CONFIG_KEY.format(cluster_name), raw=True):
                    configs_to_check = [GeneralArakoon.CONFIG_KEY.format(cluster_name)]
            else:
                gen = Configuration.list(GeneralArakoon.CONFIG_ROOT)
                for entry in gen:
                    if 'nsm_' not in entry:
                        if Configuration.exists(GeneralArakoon.CONFIG_KEY.format(cluster_name), raw=True):
                            configs_to_check.append(GeneralArakoon.CONFIG_KEY.format(entry))
            for config_name in configs_to_check:
                config_contents = Configuration.get(configs_to_check[0], raw=True)
                matrix[sr][config_name] = hashlib.md5(config_contents).hexdigest()
            if sr.node_type == 'MASTER':
                nr_of_configs_on_master = len(matrix[sr])
            else:
                nr_of_configs_on_extra = len(matrix[sr])

        TestArakoon.logger.info('cluster_ids: {0}'.format(node_ids))
        TestArakoon.logger.info('matrix: {0}'.format(matrix))

        for config_file in matrix[storagerouters[0]].keys():
            TestArakoon.validate_arakoon_config_content(config_file, node_ids)

        assert len(storagerouters) == len(matrix.keys()), "not all nodes have arakoon configs"
        incorrect_nodes = list()
        for sr in matrix:
            is_master = sr.node_type == 'MASTER'
            if (is_master is True and len(matrix[sr]) != nr_of_configs_on_master) or\
                    (is_master is False and len(matrix[sr]) != nr_of_configs_on_extra):
                incorrect_nodes.append(sr.ip)
        assert len(incorrect_nodes) == 0, "Incorrect nr of configs on nodes: {0}".format(incorrect_nodes)

        md5sum_matrix = dict()
        incorrect_configs = list()
        for cfg in matrix[storagerouters[0]]:
            for sr in storagerouters:
                if cfg not in md5sum_matrix:
                    md5sum_matrix[cfg] = matrix[sr][cfg]
                elif matrix[sr][cfg] != md5sum_matrix[cfg]:
                    incorrect_configs.append("Incorrect contents {0} for {1} on {2}, expected {3}"
                                             .format(matrix[sr][cfg], sr.ip, cfg, md5sum_matrix[cfg]))

        assert len(incorrect_configs) == 0,\
            'Incorrect arakoon config contents: \n{0}'.format('\n'.join(incorrect_configs))
Exemplo n.º 27
0
    def validate_alba_backend_sanity_without_claimed_disks(alba_backend):
        """
        Validate whether the ALBA backend is configured correctly
        :param alba_backend: ALBA backend
        :return: None
        """
        # Attribute validation
        assert alba_backend.available is True,\
            'ALBA backend {0} is not available'.format(alba_backend.backend.name)
        assert len(alba_backend.presets) >= 1,\
            'No preset found for ALBA backend {0}'.format(alba_backend.backend.name)
        assert len([default for default in alba_backend.presets if default['is_default'] is True]) == 1,\
            'Could not find default preset for backend {0}'.format(alba_backend.backend.name)
        assert alba_backend.backend.backend_type.code == 'alba',\
            'Backend type for ALBA backend is {0}'.format(alba_backend.backend.backend_type.code)
        assert alba_backend.backend.status == 'RUNNING',\
            'Status for ALBA backend is {0}'.format(alba_backend.backend.status)

        # Validate ABM and NSM services
        storagerouters = GeneralStorageRouter.get_storage_routers()
        storagerouters_with_db_role = [sr for sr in storagerouters if GeneralStorageRouter.has_roles(storagerouter=sr, roles='DB') is True and sr.node_type == 'MASTER']

        assert len(alba_backend.abm_services) == len(storagerouters_with_db_role),\
            'Not enough ABM services found'
        assert len(alba_backend.nsm_services) == len(storagerouters_with_db_role),\
            'Not enough NSM services found'

        # Validate ALBA backend configuration structure
        alba_backend_key = '/ovs/alba/backends'
        assert Configuration.dir_exists(key=alba_backend_key) is True,\
            'Configuration does not contain key {0}'.format(alba_backend_key)

        actual_config_keys = [key for key in Configuration.list(alba_backend_key)]
        expected_config_keys = ['global_gui_error_interval', alba_backend.guid, 'default_nsm_hosts']
        optional_config_keys = ['verification_factor']

        expected_keys_amount = 0
        for optional_key in optional_config_keys:
            if optional_key in actual_config_keys:
                expected_keys_amount += 1

        for expected_key in expected_config_keys:
            if not re.match(Toolbox.regex_guid, expected_key):
                expected_keys_amount += 1
            assert expected_key in actual_config_keys,\
                'Key {0} was not found in tree {1}'.format(expected_key, alba_backend_key)

        for actual_key in list(actual_config_keys):
            if re.match(Toolbox.regex_guid, actual_key):
                actual_config_keys.remove(actual_key)  # Remove all alba backend keys
        assert len(actual_config_keys) == expected_keys_amount,\
            'Another key was added to the {0} tree'.format(alba_backend_key)

        this_alba_backend_key = '{0}/{1}'.format(alba_backend_key, alba_backend.guid)
        actual_keys = [key for key in Configuration.list(this_alba_backend_key)]
        expected_keys = ['maintenance']
        assert actual_keys == expected_keys,\
            'Actual keys: {0} - Expected keys: {1}'.format(actual_keys, expected_keys)

        maintenance_key = '{0}/maintenance'.format(this_alba_backend_key)
        actual_keys = [key for key in Configuration.list(maintenance_key)]
        expected_keys = ['nr_of_agents', 'config']
        assert set(actual_keys) == set(expected_keys),\
            'Actual keys: {0} - Expected keys: {1}'.format(actual_keys, expected_keys)
        # @TODO: Add validation for config values

        # Validate ASD node configuration structure
        alba_nodes = GeneralAlba.get_alba_nodes()
        assert len(alba_nodes) > 0,\
            'Could not find any ALBA nodes in the model'
        alba_node_key = '/ovs/alba/asdnodes'
        actual_keys = [key for key in Configuration.list(alba_node_key)]
        assert len(alba_nodes) == len(actual_keys),\
            'Amount of ALBA nodes in model: {0} >< amount of ALBA nodes in configuration: {1}.'.format(len(alba_nodes),
                                                                                                       len(actual_keys))
        for alba_node in alba_nodes:
            assert alba_node.node_id in actual_keys,\
                'ALBA node with ID {0} not present in configuration'.format(alba_node.node_id)

            actual_asdnode_keys = [key for key in Configuration.list('{0}/{1}'.format(alba_node_key, alba_node.node_id))]
            expected_asdnode_keys = ['config', 'services']
            assert actual_asdnode_keys == expected_asdnode_keys,\
                'Actual keys: {0} - Expected keys: {1}'.format(actual_asdnode_keys, expected_asdnode_keys)

            actual_config_keys = [key for key in Configuration.list('{0}/{1}/config'.format(alba_node_key, alba_node.node_id))]
            expected_config_keys = ['main', 'network']
            assert set(actual_config_keys) == set(expected_config_keys),\
                'Actual keys: {0} - Expected keys: {1}'.format(actual_config_keys, expected_config_keys)
            # @TODO: Add validation for main and network values

        # Validate Arakoon configuration structure
        arakoon_abm_key = '/ovs/arakoon/{0}/config'.format(alba_backend.abm_services[0].service.name).replace('arakoon-', '')
        arakoon_nsm_key = '/ovs/arakoon/{0}/config'.format(alba_backend.nsm_services[0].service.name).replace('arakoon-', '')
        assert Configuration.exists(key=arakoon_abm_key, raw=True) is True,\
            'Configuration key {0} does not exist'.format(arakoon_abm_key)
        assert Configuration.exists(key=arakoon_nsm_key, raw=True) is True,\
            'Configuration key {0} does not exist'.format(arakoon_nsm_key)
        # @TODO: Add validation for config values

        # Validate maintenance agents
        actual_amount_agents = len([service for node_services in [alba_node.client.list_maintenance_services() for alba_node in alba_nodes] for service in node_services])
        expected_amount_agents = 1
        assert actual_amount_agents == expected_amount_agents,\
            'Amount of maintenance agents is incorrect. Found {0} - Expected {1}'.format(actual_amount_agents,
                                                                                         expected_amount_agents)

        # Validate arakoon services
        machine_ids = [sr.machine_id for sr in storagerouters_with_db_role]
        abm_service_name = alba_backend.abm_services[0].service.name
        nsm_service_name = alba_backend.nsm_services[0].service.name
        for storagerouter in storagerouters_with_db_role:
            root_client = SSHClient(endpoint=storagerouter, username='******')
            for service_name in [abm_service_name, nsm_service_name]:
                assert GeneralService.has_service(name=service_name, client=root_client) is True,\
                    'Service {0} not deployed on Storage Router {1}'.format(service_name, storagerouter.name)
                exitcode, output = GeneralService.get_service_status(name=service_name, client=root_client)
                assert exitcode is True,\
                    'Service {0} not running on Storage Router {1} - {2}'.format(service_name, storagerouter.name,
                                                                                 output)
                out, err, _ = General.execute_command('arakoon --who-master -config {0}'.format(Configuration.get_configuration_path('/ovs/arakoon/{0}/config'.format(abm_service_name.replace('arakoon-', '')))))
                assert out.strip() in machine_ids,\
                    'Arakoon master is {0}, but should be 1 of "{1}"'.format(out.strip(), ', '.join(machine_ids))
Exemplo n.º 28
0
    def list(self, discover=False, ip=None, node_id=None):
        """
        Lists all available ALBA Nodes
        :param discover: If True and IP provided, return list of single ALBA node, If True and no IP provided, return all ALBA nodes else return modeled ALBA nodes
        :type discover: bool
        :param ip: IP of ALBA node to retrieve
        :type ip: str
        :param node_id: ID of the ALBA node
        :type node_id: str
        """
        if discover is False and (ip is not None or node_id is not None):
            raise HttpNotAcceptableException(error_description='Discover is mutually exclusive with IP and nodeID',
                                             error='invalid_data')
        if (ip is None and node_id is not None) or (ip is not None and node_id is None):
            raise HttpNotAcceptableException(error_description='Both IP and nodeID need to be specified',
                                             error='invalid_data')

        if discover is False:
            return AlbaNodeList.get_albanodes()

        if ip is not None:
            node = AlbaNode(volatile=True)
            node.ip = ip
            node.type = 'ASD'
            node.node_id = node_id
            node.port = Configuration.get('/ovs/alba/asdnodes/{0}/config/main|port'.format(node_id))
            node.username = Configuration.get('/ovs/alba/asdnodes/{0}/config/main|username'.format(node_id))
            node.password = Configuration.get('/ovs/alba/asdnodes/{0}/config/main|password'.format(node_id))
            data = node.client.get_metadata()
            if data['_success'] is False and data['_error'] == 'Invalid credentials':
                raise HttpNotAcceptableException(error_description='Invalid credentials',
                                                 error='invalid_data')
            if data['node_id'] != node_id:
                raise HttpNotAcceptableException(error_description='Unexpected node identifier. {0} vs {1}'.format(data['node_id'], node_id),
                                                 error='invalid_data')
            node_list = DataList(AlbaNode, {})
            node_list._executed = True
            node_list._guids = [node.guid]
            node_list._objects = {node.guid: node}
            node_list._data = {node.guid: {'guid': node.guid, 'data': node._data}}
            return node_list

        nodes = {}
        model_node_ids = [node.node_id for node in AlbaNodeList.get_albanodes()]
        found_node_ids = []
        asd_node_ids = []
        if Configuration.dir_exists('/ovs/alba/asdnodes'):
            asd_node_ids = Configuration.list('/ovs/alba/asdnodes')

        for node_id in asd_node_ids:
            node = AlbaNode(volatile=True)
            node.type = 'ASD'
            node.node_id = node_id
            node.ip = Configuration.get('/ovs/alba/asdnodes/{0}/config/main|ip'.format(node_id))
            node.port = Configuration.get('/ovs/alba/asdnodes/{0}/config/main|port'.format(node_id))
            node.username = Configuration.get('/ovs/alba/asdnodes/{0}/config/main|username'.format(node_id))
            node.password = Configuration.get('/ovs/alba/asdnodes/{0}/config/main|password'.format(node_id))
            if node.node_id not in model_node_ids and node.node_id not in found_node_ids:
                nodes[node.guid] = node
                found_node_ids.append(node.node_id)
        node_list = DataList(AlbaNode, {})
        node_list._executed = True
        node_list._guids = nodes.keys()
        node_list._objects = nodes
        node_list._data = dict([(node.guid, {'guid': node.guid, 'data': node._data}) for node in nodes.values()])
        return node_list
Exemplo n.º 29
0
    def services_running(self):
        # type: () -> bool
        """
        Check if all services are running
        :return: Boolean
        """
        try:
            key = 'ovs-watcher-{0}'.format(str(uuid.uuid4()))
            value = str(time.time())
            if self.target in [WatcherTypes.CONFIG, WatcherTypes.FWK]:
                self.log_message('Testing configuration store...')
                try:
                    Configuration.list('/')
                except Exception as ex:
                    self.log_message('  Error during configuration store test: {0}'.format(ex), 2)
                    return False

                with open(CACC_LOCATION) as config_file:
                    contents = config_file.read()
                config = ArakoonClusterConfig(cluster_id=ARAKOON_NAME, load_config=False)
                config.read_config(contents=contents)
                client = ArakoonInstaller.build_client(config)
                contents = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY, consistency=NoGuarantee())
                if Watcher.LOG_CONTENTS != contents:
                    try:
                        config.read_config(contents=contents)  # Validate whether the contents are not corrupt
                    except Exception as ex:
                        self.log_message('  Configuration stored in configuration store seems to be corrupt: {0}'.format(ex), 2)
                        return False
                    temp_filename = '{0}~'.format(CACC_LOCATION)
                    with open(temp_filename, 'w') as config_file:
                        config_file.write(contents)
                        config_file.flush()
                        os.fsync(config_file)
                    os.rename(temp_filename, CACC_LOCATION)
                    Watcher.LOG_CONTENTS = contents
                self.log_message('  Configuration store OK', 0)

            if self.target == WatcherTypes.FWK:
                self._test_store('volatile', key, value)
                self._test_store('persistent')

            if self.target == WatcherTypes.VOLDRV:
                # Arakoon, voldrv cluster
                self._test_store('arakoon_voldrv')

            if self.target in [WatcherTypes.FWK, WatcherTypes.VOLDRV]:
                # RabbitMQ
                self.log_message('Test rabbitMQ...', 0)
                messagequeue = Configuration.get('/ovs/framework/messagequeue')
                rmq_servers = messagequeue['endpoints']
                good_node = False
                for server in rmq_servers:
                    try:
                        connection_string = '{0}://{1}:{2}@{3}/%2F'.format(messagequeue['protocol'],
                                                                           messagequeue['user'],
                                                                           messagequeue['password'],
                                                                           server)
                        connection = pika.BlockingConnection(pika.URLParameters(connection_string))
                        channel = connection.channel()
                        channel.basic_publish('', 'ovs-watcher', str(time.time()),
                                              pika.BasicProperties(content_type='text/plain', delivery_mode=1))
                        connection.close()
                        good_node = True
                    except Exception as message:
                        self.log_message('  Error during rabbitMQ test on node {0}: {1}'.format(server, message), 2)
                if good_node is False:
                    self.log_message('  No working rabbitMQ node could be found', 2)
                    return False
                self.log_message('  RabbitMQ test OK')
                self.log_message('All tests OK')

            return True
        except Exception as ex:
            self.log_message('Unexpected exception: {0}'.format(ex), 2)
            return False
Exemplo n.º 30
0
    def migrate(previous_version):
        """
        Migrates from a given version to the current version. It uses 'previous_version' to be smart
        wherever possible, but the code should be able to migrate any version towards the expected version.
        When this is not possible, the code can set a minimum version and raise when it is not met.
        :param previous_version: The previous version from which to start the migration
        :type previous_version: float
        """

        working_version = previous_version

        if working_version == 0:
            from ovs.dal.hybrids.servicetype import ServiceType
            # Initial version:
            # * Add any basic configuration or model entries

            # Add backends
            for backend_type_info in [('ALBA', 'alba')]:
                code = backend_type_info[1]
                backend_type = BackendTypeList.get_backend_type_by_code(code)
                if backend_type is None:
                    backend_type = BackendType()
                backend_type.name = backend_type_info[0]
                backend_type.code = code
                backend_type.save()

            # Add service types
            for service_type_info in [
                    ServiceType.SERVICE_TYPES.NS_MGR,
                    ServiceType.SERVICE_TYPES.ALBA_MGR,
                    ServiceType.SERVICE_TYPES.ALBA_S3_TRANSACTION
            ]:
                service_type = ServiceType()
                service_type.name = service_type_info
                service_type.save()

        # From here on, all actual migration should happen to get to the expected state for THIS RELEASE
        elif working_version < DALMigrator.THIS_VERSION:
            import hashlib
            from ovs.dal.exceptions import ObjectNotFoundException
            from ovs.dal.helpers import HybridRunner, Descriptor
            from ovs.dal.hybrids.albaabmcluster import ABMCluster
            from ovs.dal.hybrids.albaosd import AlbaOSD
            from ovs.dal.hybrids.albansmcluster import NSMCluster
            from ovs.dal.hybrids.j_abmservice import ABMService
            from ovs.dal.hybrids.j_nsmservice import NSMService
            from ovs.dal.hybrids.service import Service
            from ovs.dal.hybrids.servicetype import ServiceType
            from ovs.dal.lists.albabackendlist import AlbaBackendList
            from ovs.dal.lists.albanodelist import AlbaNodeList
            from ovs.dal.lists.servicetypelist import ServiceTypeList
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            from ovs.extensions.db.arakooninstaller import ArakoonClusterConfig, ArakoonInstaller
            from ovs.extensions.generic.configuration import Configuration, NotFoundException
            from ovs_extensions.generic.toolbox import ExtensionsToolbox
            from ovs.extensions.plugins.albacli import AlbaCLI
            from ovs.extensions.storage.persistentfactory import PersistentFactory

            # Migrate unique constraints & indexes
            client = PersistentFactory.get_client()
            hybrid_structure = HybridRunner.get_hybrids()
            for class_descriptor in hybrid_structure.values():
                cls = Descriptor().load(class_descriptor).get_object()
                classname = cls.__name__.lower()
                unique_key = 'ovs_unique_{0}_{{0}}_'.format(classname)
                index_prefix = 'ovs_index_{0}|{{0}}|'.format(classname)
                index_key = 'ovs_index_{0}|{{0}}|{{1}}'.format(classname)
                uniques = []
                indexes = []
                # noinspection PyProtectedMember
                for prop in cls._properties:
                    if prop.unique is True and len([
                            k for k in client.prefix(
                                unique_key.format(prop.name))
                    ]) == 0:
                        uniques.append(prop.name)
                    if prop.indexed is True and len([
                            k for k in client.prefix(
                                index_prefix.format(prop.name))
                    ]) == 0:
                        indexes.append(prop.name)
                if len(uniques) > 0 or len(indexes) > 0:
                    prefix = 'ovs_data_{0}_'.format(classname)
                    for key, data in client.prefix_entries(prefix):
                        for property_name in uniques:
                            ukey = '{0}{1}'.format(
                                unique_key.format(property_name),
                                hashlib.sha1(str(
                                    data[property_name])).hexdigest())
                            client.set(ukey, key)
                        for property_name in indexes:
                            if property_name not in data:
                                continue  # This is the case when there's a new indexed property added.
                            ikey = index_key.format(
                                property_name,
                                hashlib.sha1(str(
                                    data[property_name])).hexdigest())
                            index = list(
                                client.get_multi([ikey], must_exist=False))[0]
                            transaction = client.begin_transaction()
                            if index is None:
                                client.assert_value(ikey,
                                                    None,
                                                    transaction=transaction)
                                client.set(ikey, [key],
                                           transaction=transaction)
                            elif key not in index:
                                client.assert_value(ikey,
                                                    index[:],
                                                    transaction=transaction)
                                client.set(ikey,
                                           index + [key],
                                           transaction=transaction)
                            client.apply_transaction(transaction)

            #############################################
            # Introduction of ABMCluster and NSMCluster #
            #############################################
            # Verify presence of unchanged ALBA Backends
            alba_backends = AlbaBackendList.get_albabackends()
            changes_required = False
            for alba_backend in alba_backends:
                if alba_backend.abm_cluster is None or len(
                        alba_backend.nsm_clusters) == 0:
                    changes_required = True
                    break

            if changes_required:
                # Retrieve ABM and NSM clusters
                abm_cluster_info = []
                nsm_cluster_info = []
                for cluster_name in Configuration.list('/ovs/arakoon'):
                    try:
                        metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
                            cluster_name=cluster_name)
                        if metadata[
                                'cluster_type'] == ServiceType.ARAKOON_CLUSTER_TYPES.ABM:
                            abm_cluster_info.append(metadata)
                        elif metadata[
                                'cluster_type'] == ServiceType.ARAKOON_CLUSTER_TYPES.NSM:
                            nsm_cluster_info.append(metadata)
                    except NotFoundException:
                        continue

                # Retrieve NSM Arakoon cluster information
                cluster_arakoon_map = {}
                for cluster_info in abm_cluster_info + nsm_cluster_info:
                    cluster_name = cluster_info['cluster_name']
                    arakoon_config = ArakoonClusterConfig(
                        cluster_id=cluster_name)
                    cluster_arakoon_map[
                        cluster_name] = arakoon_config.export_dict()

                storagerouter_map = dict(
                    (storagerouter.machine_id, storagerouter) for storagerouter
                    in StorageRouterList.get_storagerouters())
                alba_backend_id_map = dict((alba_backend.alba_id, alba_backend)
                                           for alba_backend in alba_backends)
                for cluster_info in abm_cluster_info:
                    internal = cluster_info['internal']
                    cluster_name = cluster_info['cluster_name']
                    config_location = Configuration.get_configuration_path(
                        key=ArakoonClusterConfig.CONFIG_KEY.format(
                            cluster_name))
                    try:
                        alba_id = AlbaCLI.run(command='get-alba-id',
                                              config=config_location,
                                              named_params={'attempts':
                                                            3})['id']
                        nsm_hosts = AlbaCLI.run(command='list-nsm-hosts',
                                                config=config_location,
                                                named_params={'attempts': 3})
                    except RuntimeError:
                        continue

                    alba_backend = alba_backend_id_map.get(alba_id)
                    if alba_backend is None:  # ALBA Backend with ID not found in model
                        continue
                    if alba_backend.abm_cluster is not None and len(
                            alba_backend.nsm_clusters
                    ) > 0:  # Clusters already exist
                        continue

                    # Create ABM Cluster
                    if alba_backend.abm_cluster is None:
                        abm_cluster = ABMCluster()
                        abm_cluster.name = cluster_name
                        abm_cluster.alba_backend = alba_backend
                        abm_cluster.config_location = ArakoonClusterConfig.CONFIG_KEY.format(
                            cluster_name)
                        abm_cluster.save()
                    else:
                        abm_cluster = alba_backend.abm_cluster

                    # Create ABM Services
                    abm_arakoon_config = cluster_arakoon_map[cluster_name]
                    abm_arakoon_config.pop('global')
                    arakoon_nodes = abm_arakoon_config.keys()
                    if internal is False:
                        services_to_create = 1
                    else:
                        if set(arakoon_nodes).difference(
                                set(storagerouter_map.keys())):
                            continue
                        services_to_create = len(arakoon_nodes)
                    for index in range(services_to_create):
                        service = Service()
                        service.name = 'arakoon-{0}-abm'.format(
                            alba_backend.name)
                        service.type = ServiceTypeList.get_by_name(
                            ServiceType.SERVICE_TYPES.ALBA_MGR)
                        if internal is True:
                            arakoon_node_config = abm_arakoon_config[
                                arakoon_nodes[index]]
                            service.ports = [
                                arakoon_node_config['client_port'],
                                arakoon_node_config['messaging_port']
                            ]
                            service.storagerouter = storagerouter_map[
                                arakoon_nodes[index]]
                        else:
                            service.ports = []
                            service.storagerouter = None
                        service.save()

                        abm_service = ABMService()
                        abm_service.service = service
                        abm_service.abm_cluster = abm_cluster
                        abm_service.save()

                    # Create NSM Clusters
                    for cluster_index, nsm_host in enumerate(
                            sorted(nsm_hosts,
                                   key=lambda host: ExtensionsToolbox.
                                   advanced_sort(host['cluster_id'], '_'))):
                        nsm_cluster_name = nsm_host['cluster_id']
                        nsm_arakoon_config = cluster_arakoon_map.get(
                            nsm_cluster_name)
                        if nsm_arakoon_config is None:
                            continue

                        number = cluster_index if internal is False else int(
                            nsm_cluster_name.split('_')[-1])
                        nsm_cluster = NSMCluster()
                        nsm_cluster.name = nsm_cluster_name
                        nsm_cluster.number = number
                        nsm_cluster.alba_backend = alba_backend
                        nsm_cluster.config_location = ArakoonClusterConfig.CONFIG_KEY.format(
                            nsm_cluster_name)
                        nsm_cluster.save()

                        # Create NSM Services
                        nsm_arakoon_config.pop('global')
                        arakoon_nodes = nsm_arakoon_config.keys()
                        if internal is False:
                            services_to_create = 1
                        else:
                            if set(arakoon_nodes).difference(
                                    set(storagerouter_map.keys())):
                                continue
                            services_to_create = len(arakoon_nodes)
                        for service_index in range(services_to_create):
                            service = Service()
                            service.name = 'arakoon-{0}-nsm_{1}'.format(
                                alba_backend.name, number)
                            service.type = ServiceTypeList.get_by_name(
                                ServiceType.SERVICE_TYPES.NS_MGR)
                            if internal is True:
                                arakoon_node_config = nsm_arakoon_config[
                                    arakoon_nodes[service_index]]
                                service.ports = [
                                    arakoon_node_config['client_port'],
                                    arakoon_node_config['messaging_port']
                                ]
                                service.storagerouter = storagerouter_map[
                                    arakoon_nodes[service_index]]
                            else:
                                service.ports = []
                                service.storagerouter = None
                            service.save()

                            nsm_service = NSMService()
                            nsm_service.service = service
                            nsm_service.nsm_cluster = nsm_cluster
                            nsm_service.save()

            # Clean up all junction services no longer linked to an ALBA Backend
            all_nsm_services = [
                service.nsm_service for service in ServiceTypeList.get_by_name(
                    ServiceType.SERVICE_TYPES.NS_MGR).services
                if service.nsm_service.nsm_cluster is None
            ]
            all_abm_services = [
                service.abm_service for service in ServiceTypeList.get_by_name(
                    ServiceType.SERVICE_TYPES.ALBA_MGR).services
                if service.abm_service.abm_cluster is None
            ]
            for abm_service in all_abm_services:
                abm_service.delete()
                abm_service.service.delete()
            for nsm_service in all_nsm_services:
                nsm_service.delete()
                nsm_service.service.delete()

            ################################
            # Introduction of Active Drive #
            ################################
            # Update slot_id and Alba Node relation for all OSDs
            client = PersistentFactory.get_client()
            disk_osd_map = {}
            for key, data in client.prefix_entries('ovs_data_albaosd_'):
                alba_disk_guid = data.get('alba_disk', {}).get('guid')
                if alba_disk_guid is not None:
                    if alba_disk_guid not in disk_osd_map:
                        disk_osd_map[alba_disk_guid] = []
                    disk_osd_map[alba_disk_guid].append(
                        key.replace('ovs_data_albaosd_', ''))
                try:
                    value = client.get(key)
                    value.pop('alba_disk', None)
                    client.set(key=key, value=value)
                except Exception:
                    pass  # We don't care if we would have any leftover AlbaDisk information in _data, but its cleaner not to

            alba_guid_node_map = dict(
                (an.guid, an) for an in AlbaNodeList.get_albanodes())
            for key, data in client.prefix_entries('ovs_data_albadisk_'):
                alba_disk_guid = key.replace('ovs_data_albadisk_', '')
                alba_node_guid = data.get('alba_node', {}).get('guid')
                if alba_disk_guid in disk_osd_map and alba_node_guid in alba_guid_node_map and len(
                        data.get('aliases', [])) > 0:
                    slot_id = data['aliases'][0].split('/')[-1]
                    for osd_guid in disk_osd_map[alba_disk_guid]:
                        try:
                            osd = AlbaOSD(osd_guid)
                        except ObjectNotFoundException:
                            continue
                        osd.slot_id = slot_id
                        osd.alba_node = alba_guid_node_map[alba_node_guid]
                        osd.save()
                client.delete(key=key, must_exist=False)

            # Remove unique constraints for AlbaNode IP
            for key in client.prefix('ovs_unique_albanode_ip_'):
                client.delete(key=key, must_exist=False)

            # Remove relation for all Alba Disks
            for key in client.prefix('ovs_reverseindex_albadisk_'):
                client.delete(key=key, must_exist=False)

            # Remove the relation between AlbaNode and AlbaDisk
            for key in client.prefix('ovs_reverseindex_albanode_'):
                if '|disks|' in key:
                    client.delete(key=key, must_exist=False)

        return DALMigrator.THIS_VERSION
Exemplo n.º 31
0
    def promote_or_demote_node(node_action, cluster_ip=None, execute_rollback=False):
        """
        Promotes or demotes the local node
        :param node_action: Demote or promote
        :type node_action: str
        :param cluster_ip: IP of node to promote or demote
        :type cluster_ip: str
        :param execute_rollback: In case of failure revert the changes made
        :type execute_rollback: bool
        :return: None
        """

        if node_action not in ('promote', 'demote'):
            raise ValueError('Nodes can only be promoted or demoted')

        Toolbox.log(logger=NodeTypeController._logger, messages='Open vStorage Setup - {0}'.format(node_action.capitalize()), boxed=True)
        try:
            Toolbox.log(logger=NodeTypeController._logger, messages='Collecting information', title=True)

            machine_id = System.get_my_machine_id()
            if Configuration.get('/ovs/framework/hosts/{0}/setupcompleted'.format(machine_id)) is False:
                raise RuntimeError('No local OVS setup found.')

            if cluster_ip and not re.match(Toolbox.regex_ip, cluster_ip):
                raise RuntimeError('Incorrect IP provided ({0})'.format(cluster_ip))

            if cluster_ip:
                client = SSHClient(endpoint=cluster_ip)
                machine_id = System.get_my_machine_id(client)

            node_type = Configuration.get('/ovs/framework/hosts/{0}/type'.format(machine_id))
            if node_action == 'promote' and node_type == 'MASTER':
                raise RuntimeError('This node is already master.')
            elif node_action == 'demote' and node_type == 'EXTRA':
                raise RuntimeError('This node should be a master.')
            elif node_type not in ['MASTER', 'EXTRA']:
                raise RuntimeError('This node is not correctly configured.')

            master_ip = None
            offline_nodes = []

            online = True
            target_client = None
            if node_action == 'demote' and cluster_ip:  # Demote an offline node
                from ovs.dal.lists.storagerouterlist import StorageRouterList
                from ovs.lib.storagedriver import StorageDriverController

                ip = cluster_ip
                unique_id = None
                ip_client_map = {}
                for storage_router in StorageRouterList.get_storagerouters():
                    try:
                        client = SSHClient(storage_router.ip, username='******')
                        if storage_router.node_type == 'MASTER':
                            master_ip = storage_router.ip
                        ip_client_map[storage_router.ip] = client
                    except UnableToConnectException:
                        if storage_router.ip == cluster_ip:
                            online = False
                            unique_id = storage_router.machine_id
                            StorageDriverController.mark_offline(storagerouter_guid=storage_router.guid)
                        offline_nodes.append(storage_router)
                if online is True:
                    raise RuntimeError("If the node is online, please use 'ovs setup demote' executed on the node you wish to demote")
                if master_ip is None:
                    raise RuntimeError('Failed to retrieve another responsive MASTER node')

            else:
                target_password = Toolbox.ask_validate_password(ip='127.0.0.1', logger=NodeTypeController._logger)
                target_client = SSHClient('127.0.0.1', username='******', password=target_password)

                unique_id = System.get_my_machine_id(target_client)
                ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(unique_id))

                storagerouter_info = NodeTypeController.retrieve_storagerouter_info_via_host(ip=target_client.ip, password=target_password)
                node_ips = [sr_info['ip'] for sr_info in storagerouter_info.itervalues()]
                master_node_ips = [sr_info['ip'] for sr_info in storagerouter_info.itervalues() if sr_info['type'] == 'master' and sr_info['ip'] != ip]
                if len(master_node_ips) == 0:
                    if node_action == 'promote':
                        raise RuntimeError('No master node could be found')
                    else:
                        raise RuntimeError('It is not possible to remove the only master')

                master_ip = master_node_ips[0]
                ip_client_map = dict((node_ip, SSHClient(node_ip, username='******')) for node_ip in node_ips)

            if node_action == 'demote':
                for cluster_name in Configuration.list('/ovs/arakoon'):
                    config = ArakoonClusterConfig(cluster_name, False)
                    config.load_config()
                    arakoon_client = ArakoonInstaller.build_client(config)
                    metadata = json.loads(arakoon_client.get(ArakoonInstaller.METADATA_KEY))
                    if len(config.nodes) == 1 and config.nodes[0].ip == ip and metadata.get('internal') is True:
                        raise RuntimeError('Demote is not supported when single node Arakoon cluster(s) are present on the node to be demoted.')

            configure_rabbitmq = Toolbox.is_service_internally_managed(service='rabbitmq')
            configure_memcached = Toolbox.is_service_internally_managed(service='memcached')
            if node_action == 'promote':
                try:
                    NodeTypeController.promote_node(cluster_ip=ip,
                                                    master_ip=master_ip,
                                                    ip_client_map=ip_client_map,
                                                    unique_id=unique_id,
                                                    configure_memcached=configure_memcached,
                                                    configure_rabbitmq=configure_rabbitmq)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.demote_node(cluster_ip=ip,
                                                       master_ip=master_ip,
                                                       ip_client_map=ip_client_map,
                                                       unique_id=unique_id,
                                                       unconfigure_memcached=configure_memcached,
                                                       unconfigure_rabbitmq=configure_rabbitmq,
                                                       offline_nodes=offline_nodes)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback', 'demote')
                    raise
            else:
                try:
                    NodeTypeController.demote_node(cluster_ip=ip,
                                                   master_ip=master_ip,
                                                   ip_client_map=ip_client_map,
                                                   unique_id=unique_id,
                                                   unconfigure_memcached=configure_memcached,
                                                   unconfigure_rabbitmq=configure_rabbitmq,
                                                   offline_nodes=offline_nodes)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.promote_node(cluster_ip=ip,
                                                        master_ip=master_ip,
                                                        ip_client_map=ip_client_map,
                                                        unique_id=unique_id,
                                                        configure_memcached=configure_memcached,
                                                        configure_rabbitmq=configure_rabbitmq)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback', 'promote')
                    raise

            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger, messages='{0} complete.'.format(node_action.capitalize()), boxed=True)
        except Exception as exception:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger, messages=['An unexpected error occurred:', str(exception)], boxed=True, loglevel='exception')
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.',
                        boxed=True,
                        loglevel='error')
            sys.exit(1)
Exemplo n.º 32
0
    def migrate():
        """
        Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually
        executed. This code will typically contain:
        * "dangerous" migration code (it needs certain running services)
        * Migration code depending on a cluster-wide state
        * ...
        * Successfully finishing a piece of migration code, should create an entry in /ovs/framework/migration in case it should not be executed again
        *     Eg: /ovs/framework/migration|stats_monkey_integration: True
        """
        MigrationController._logger.info('Preparing out of band migrations...')

        from ovs.dal.lists.servicetypelist import ServiceTypeList
        from ovs.dal.lists.storagedriverlist import StorageDriverList
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.vpoollist import VPoolList
        from ovs.extensions.db.arakooninstaller import ArakoonInstaller
        from ovs.extensions.generic.configuration import Configuration
        from ovs.extensions.generic.sshclient import SSHClient
        from ovs_extensions.generic.toolbox import ExtensionsToolbox
        from ovs.extensions.migration.migration.ovsmigrator import ExtensionMigrator
        from ovs.extensions.packages.packagefactory import PackageFactory
        from ovs_extensions.services.interfaces.systemd import Systemd
        from ovs.extensions.services.servicefactory import ServiceFactory
        from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
        from ovs.lib.helpers.storagedriver.installer import StorageDriverInstaller

        MigrationController._logger.info('Start out of band migrations...')
        service_manager = ServiceFactory.get_manager()

        sr_client_map = {}
        for storagerouter in StorageRouterList.get_storagerouters():
            sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter.ip,  # Is triggered during post-update code too during which the ovs-watcher-framework service is still down and thus not refreshing the heartbeat --> use IP i/o StorageRouter
                                                          username='******')

        #########################################################
        # Addition of 'ExecReload' for AlbaProxy SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagedriver in StorageDriverList.get_storagedrivers():
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                for alba_proxy in storagedriver.alba_proxies:
                    service = alba_proxy.service
                    service_name = 'ovs-{0}'.format(service.name)
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue

                    try:
                        service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])

        ##################################################################
        # Adjustment of open file descriptors for Arakoon services to 8192
        changed_clients = set()
        for storagerouter in StorageRouterList.get_storagerouters():
            root_client = sr_client_map[storagerouter.guid]
            for service_name in service_manager.list_services(client=root_client):
                if not service_name.startswith('ovs-arakoon-'):
                    continue

                if ServiceFactory.get_service_type() == 'systemd':
                    path = '/lib/systemd/system/{0}.service'.format(service_name)
                    check = 'LimitNOFILE=8192'
                else:
                    path = '/etc/init/{0}.conf'.format(service_name)
                    check = 'limit nofile 8192 8192'

                if not root_client.file_exists(path):
                    continue
                if check in root_client.file_read(path):
                    continue

                try:
                    service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name)
                    changed_clients.add(root_client)
                    ExtensionsToolbox.edit_version_file(client=root_client,
                                                        package_name='arakoon',
                                                        old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name))
                except:
                    MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
        for root_client in changed_clients:
            root_client.run(['systemctl', 'daemon-reload'])

        #############################
        # Migrate to multiple proxies
        for storagedriver in StorageDriverList.get_storagedrivers():
            vpool = storagedriver.vpool
            root_client = sr_client_map[storagedriver.storagerouter_guid]
            for alba_proxy in storagedriver.alba_proxies:
                # Rename alba_proxy service in model
                service = alba_proxy.service
                old_service_name = 'albaproxy_{0}'.format(vpool.name)
                new_service_name = 'albaproxy_{0}_0'.format(vpool.name)
                if old_service_name != service.name:
                    continue
                service.name = new_service_name
                service.save()

                if not service_manager.has_service(name=old_service_name, client=root_client):
                    continue
                old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name)
                if not Configuration.exists(key=old_configuration_key):
                    continue

                # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='alba',
                                                    old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, old_service_name),
                                                    new_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, new_service_name))

                # Register new service and remove old service
                service_manager.add_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY,
                                            client=root_client,
                                            params=Configuration.get(old_configuration_key),
                                            target_name='ovs-{0}'.format(new_service_name))

                # Update scrub proxy config
                proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)
                proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key)
                if proxy_config is not None:
                    fragment_cache = proxy_config.get(StorageDriverConfiguration.CACHE_FRAGMENT, ['none', {}])
                    if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True:  # Accelerated ALBA configured
                        fragment_cache_scrub_info = copy.deepcopy(fragment_cache)
                        fragment_cache_scrub_info[1]['cache_on_read'] = False
                        proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)
                        proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key)
                        if proxy_scrub_config is not None and proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] == ['none']:
                            proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] = fragment_cache_scrub_info
                            Configuration.set(key=proxy_scrub_config_key, value=proxy_scrub_config)

            # Update 'backend_connection_manager' section
            changes = False
            storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id)
            if 'backend_connection_manager' not in storagedriver_config.configuration:
                continue

            current_config = storagedriver_config.configuration['backend_connection_manager']
            if current_config.get('backend_type') != 'MULTI':
                changes = True
                backend_connection_manager = {'backend_type': 'MULTI'}
                for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])):
                    backend_connection_manager[str(index)] = copy.deepcopy(current_config)
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_use_rora'] = True
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000
                    # noinspection PyUnresolvedReferences
                    for key, value in backend_connection_manager[str(index)].items():
                        if key.startswith('backend_interface'):
                            backend_connection_manager[key] = value
                            # noinspection PyUnresolvedReferences
                            del backend_connection_manager[str(index)][key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        backend_connection_manager[key] = value
            else:
                backend_connection_manager = current_config
                for value in backend_connection_manager.values():
                    if isinstance(value, dict):
                        for key, val in value.items():
                            if key.startswith('backend_interface'):
                                backend_connection_manager[key] = val
                                changes = True
                                del value[key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        changes = True
                        backend_connection_manager[key] = value

            if changes is True:
                storagedriver_config.clear_backend_connection_manager()
                storagedriver_config.configure_backend_connection_manager(**backend_connection_manager)
                storagedriver_config.save(root_client)

                # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='volumedriver',
                                                    old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name)))
                if service_manager.__class__ == Systemd:
                    root_client.run(['systemctl', 'daemon-reload'])

        ########################################
        # Update metadata_store_bits information
        vpools = VPoolList.get_vpools()
        for vpool in vpools:
            bits = None
            for storagedriver in vpool.storagedrivers:
                key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name)
                if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key):
                    if bits is None:
                        entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name),
                                                                            client=sr_client_map[storagedriver.storagerouter_guid],
                                                                            entries=['METADATASTORE_BITS='])
                        if len(entries) == 1:
                            bits = entries[0].split('=')[-1]
                            bits = int(bits) if bits.isdigit() else 5
                    if bits is not None:
                        try:
                            content = Configuration.get(key=key)
                            content['METADATASTORE_BITS'] = bits
                            Configuration.set(key=key, value=content)
                        except:
                            MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name))

            if bits is not None:
                vpool.metadata_store_bits = bits
                vpool.save()

        #####################################
        # Update the vPool metadata structure
        def _update_metadata_structure(metadata):
            metadata = copy.deepcopy(metadata)
            cache_structure = {'read': False,
                               'write': False,
                               'is_backend': False,
                               'quota': None,
                               'backend_info': {'name': None,  # Will be filled in when is_backend is true
                                                'backend_guid': None,
                                                'alba_backend_guid': None,
                                                'policies': None,
                                                'preset': None,
                                                'arakoon_config': None,
                                                'connection_info': {'client_id': None,
                                                                    'client_secret': None,
                                                                    'host': None,
                                                                    'port': None,
                                                                    'local': None}}
                               }
            structure_map = {StorageDriverConfiguration.CACHE_BLOCK: {'read': 'block_cache_on_read',
                                                                      'write': 'block_cache_on_write',
                                                                      'quota': 'quota_bc',
                                                                      'backend_prefix': 'backend_bc_{0}'},
                             StorageDriverConfiguration.CACHE_FRAGMENT: {'read': 'fragment_cache_on_read',
                                                                         'write': 'fragment_cache_on_write',
                                                                         'quota': 'quota_fc',
                                                                         'backend_prefix': 'backend_aa_{0}'}}
            if 'arakoon_config' in metadata['backend']:  # Arakoon config should be placed under the backend info
                metadata['backend']['backend_info']['arakoon_config'] = metadata['backend'].pop('arakoon_config')
            if 'connection_info' in metadata['backend']:  # Connection info sohuld be placed under the backend info
                metadata['backend']['backend_info']['connection_info'] = metadata['backend'].pop('connection_info')
            if 'caching_info' not in metadata:  # Caching info is the new key
                would_be_caching_info = {}
                metadata['caching_info'] = would_be_caching_info
                # Extract all caching data for every storagerouter
                current_caching_info = metadata['backend'].pop('caching_info')  # Pop to mutate metadata
                for storagerouter_guid in current_caching_info.iterkeys():
                    current_cache_data = current_caching_info[storagerouter_guid]
                    storagerouter_caching_info = {}
                    would_be_caching_info[storagerouter_guid] = storagerouter_caching_info
                    for cache_type, cache_type_mapping in structure_map.iteritems():
                        new_cache_structure = copy.deepcopy(cache_structure)
                        storagerouter_caching_info[cache_type] = new_cache_structure
                        for new_structure_key, old_structure_key in cache_type_mapping.iteritems():
                            if new_structure_key == 'backend_prefix':
                                # Get possible backend related info
                                metadata_key = old_structure_key.format(storagerouter_guid)
                                if metadata_key not in metadata:
                                    continue
                                backend_data = metadata.pop(metadata_key)  # Pop to mutate metadata
                                new_cache_structure['is_backend'] = True
                                # Copy over the old data
                                new_cache_structure['backend_info']['arakoon_config'] = backend_data['arakoon_config']
                                new_cache_structure['backend_info'].update(backend_data['backend_info'])
                                new_cache_structure['backend_info']['connection_info'].update(backend_data['connection_info'])
                            else:
                                new_cache_structure[new_structure_key] = current_cache_data.get(old_structure_key)
            return metadata

        vpools = VPoolList.get_vpools()
        for vpool in vpools:
            try:
                new_metadata = _update_metadata_structure(vpool.metadata)
                vpool.metadata = new_metadata
                vpool.save()
            except KeyError:
                MigrationController._logger.exception('Exceptions occurred when updating the metadata for vPool {0}'.format(vpool.name))

        ##############################################
        # Always use indent=4 during Configuration set
        def _resave_all_config_entries(config_path='/ovs'):
            """
            Recursive functions which checks every config management key if its a directory or not.
            If not a directory, we retrieve the config and just save it again using the new indentation logic
            """
            for item in Configuration.list(config_path):
                new_path = config_path + '/' + item
                print new_path
                if Configuration.dir_exists(new_path) is True:
                    _resave_all_config_entries(config_path=new_path)
                else:
                    try:
                        _config = Configuration.get(new_path)
                        Configuration.set(new_path, _config)
                    except:
                        _config = Configuration.get(new_path, raw=True)
                        Configuration.set(new_path, _config, raw=True)
        if ExtensionMigrator.THIS_VERSION <= 13:  # There is no way of checking whether this new indentation logic has been applied, so we only perform this for version 13 and lower
            MigrationController._logger.info('Re-saving every configuration setting with new indentation rules')
            _resave_all_config_entries()

        ############################
        # Update some default values
        def _update_manifest_cache_size(_proxy_config_key):
            updated = False
            manifest_cache_size = 500 * 1024 * 1024
            if Configuration.exists(key=_proxy_config_key):
                _proxy_config = Configuration.get(key=_proxy_config_key)
                for cache_type in [StorageDriverConfiguration.CACHE_BLOCK, StorageDriverConfiguration.CACHE_FRAGMENT]:
                    if cache_type in _proxy_config and _proxy_config[cache_type][0] == 'alba':
                        if _proxy_config[cache_type][1]['manifest_cache_size'] != manifest_cache_size:
                            updated = True
                            _proxy_config[cache_type][1]['manifest_cache_size'] = manifest_cache_size
                if _proxy_config['manifest_cache_size'] != manifest_cache_size:
                    updated = True
                    _proxy_config['manifest_cache_size'] = manifest_cache_size

                if updated is True:
                    Configuration.set(key=_proxy_config_key, value=_proxy_config)
            return updated

        for storagedriver in StorageDriverList.get_storagedrivers():
            try:
                vpool = storagedriver.vpool
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                _update_manifest_cache_size('/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid))  # Generic scrub proxy is deployed every time scrubbing kicks in, so no need to restart these services
                for alba_proxy in storagedriver.alba_proxies:
                    if _update_manifest_cache_size('/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)) is True:
                        # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service)
                        ExtensionsToolbox.edit_version_file(client=root_client,
                                                            package_name='alba',
                                                            old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, alba_proxy.service.name))

                # Update 'backend_connection_manager' section
                changes = False
                storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id)
                if 'backend_connection_manager' not in storagedriver_config.configuration:
                    continue

                current_config = storagedriver_config.configuration['backend_connection_manager']
                for key, value in current_config.iteritems():
                    if key.isdigit() is True:
                        if value.get('alba_connection_asd_connection_pool_capacity') != 10:
                            changes = True
                            value['alba_connection_asd_connection_pool_capacity'] = 10
                        if value.get('alba_connection_timeout') != 30:
                            changes = True
                            value['alba_connection_timeout'] = 30
                        if value.get('alba_connection_rora_manifest_cache_capacity') != 25000:
                            changes = True
                            value['alba_connection_rora_manifest_cache_capacity'] = 25000

                if changes is True:
                    storagedriver_config.clear_backend_connection_manager()
                    storagedriver_config.configure_backend_connection_manager(**current_config)
                    storagedriver_config.save(root_client)

                    # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section)
                    ExtensionsToolbox.edit_version_file(client=root_client,
                                                        package_name='volumedriver',
                                                        old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name)))
            except Exception:
                MigrationController._logger.exception('Updating default configuration values failed for StorageDriver {0}'.format(storagedriver.storagedriver_id))

        ####################################################
        # Adding proxy fail fast as env variable for proxies
        changed_clients = set()
        for storagerouter in StorageRouterList.get_storagerouters():
            root_client = sr_client_map[storagerouter.guid]
            for service_name in service_manager.list_services(client=root_client):
                if not service_name.startswith('ovs-albaproxy_'):
                    continue

                if ServiceFactory.get_service_type() == 'systemd':
                    path = '/lib/systemd/system/{0}.service'.format(service_name)
                    check = 'Environment=ALBA_FAIL_FAST=true'
                else:
                    path = '/etc/init/{0}.conf'.format(service_name)
                    check = 'env ALBA_FAIL_FAST=true'

                if not root_client.file_exists(path):
                    continue
                if check in root_client.file_read(path):
                    continue

                try:
                    service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name)
                    changed_clients.add(root_client)
                    ExtensionsToolbox.edit_version_file(client=root_client,
                                                        package_name='alba',
                                                        old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name))
                except:
                    MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
        for root_client in changed_clients:
            root_client.run(['systemctl', 'daemon-reload'])

        ######################################
        # Integration of stats monkey (2.10.2)
        if Configuration.get(key='/ovs/framework/migration|stats_monkey_integration', default=False) is False:
            try:
                # Get content of old key into new key
                old_stats_monkey_key = '/statsmonkey/statsmonkey'
                if Configuration.exists(key=old_stats_monkey_key) is True:
                    Configuration.set(key='/ovs/framework/monitoring/stats_monkey', value=Configuration.get(key=old_stats_monkey_key))
                    Configuration.delete(key=old_stats_monkey_key)

                # Make sure to disable the stats monkey by default or take over the current schedule if it was configured manually before
                celery_key = '/ovs/framework/scheduling/celery'
                current_value = None
                scheduling_config = Configuration.get(key=celery_key, default={})
                if 'statsmonkey.run_all_stats' in scheduling_config:  # Old celery task name of the stats monkey
                    current_value = scheduling_config.pop('statsmonkey.run_all_stats')
                scheduling_config['ovs.stats_monkey.run_all'] = current_value
                scheduling_config['alba.stats_monkey.run_all'] = current_value
                Configuration.set(key=celery_key, value=scheduling_config)

                support_key = '/ovs/framework/support'
                support_config = Configuration.get(key=support_key)
                support_config['support_agent'] = support_config.pop('enabled', True)
                support_config['remote_access'] = support_config.pop('enablesupport', False)
                Configuration.set(key=support_key, value=support_config)

                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|stats_monkey_integration', value=True)
            except Exception:
                MigrationController._logger.exception('Integration of stats monkey failed')

        ######################################################
        # Write away cluster ID to a file for back-up purposes
        try:
            cluster_id = Configuration.get(key='/ovs/framework/cluster_id', default=None)
            with open(Configuration.CONFIG_STORE_LOCATION, 'r') as config_file:
                config = json.load(config_file)
            if cluster_id is not None and config.get('cluster_id', None) is None:
                config['cluster_id'] = cluster_id
                with open(Configuration.CONFIG_STORE_LOCATION, 'w') as config_file:
                    json.dump(config, config_file, indent=4)
        except Exception:
            MigrationController._logger.exception('Writing cluster id to a file failed.')

        #########################################################
        # Additional string formatting in Arakoon services (2.11)
        try:
            if Configuration.get(key='/ovs/framework/migration|arakoon_service_update', default=False) is False:
                arakoon_service_names = [ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name) for cluster_name in Configuration.list(key='ovs/arakoon')]
                for storagerouter in StorageRouterList.get_masters():
                    for service_name in arakoon_service_names:
                        config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagerouter.machine_id, service_name)
                        if Configuration.exists(key=config_key):
                            config = Configuration.get(key=config_key)
                            config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR
                            config['ARAKOON_PKG_NAME'] = PackageFactory.PKG_ARAKOON
                            config['ARAKOON_VERSION_CMD'] = PackageFactory.VERSION_CMD_ARAKOON
                            Configuration.set(key=config_key, value=config)
                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|arakoon_service_update', value=True)
        except Exception:
            MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed')

        ############################################################
        # Additional string formatting in ALBA proxy services (2.11)
        changed_clients = set()
        try:
            if Configuration.get(key='/ovs/framework/migration|alba_proxy_service_update', default=False) is False:
                alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_ALBA)
                for service in ServiceTypeList.get_by_name('AlbaProxy').services:
                    root_client = sr_client_map[service.storagerouter_guid]
                    config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(service.storagerouter.machine_id, service.name)
                    if Configuration.exists(key=config_key):
                        config = Configuration.get(key=config_key)
                        config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR
                        config['ALBA_PKG_NAME'] = alba_pkg_name
                        config['ALBA_VERSION_CMD'] = alba_version_cmd
                        Configuration.set(key=config_key, value=config)
                        service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY,
                                                           client=root_client,
                                                           target_name='ovs-{0}'.format(service.name))
                        changed_clients.add(root_client)

                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|alba_proxy_service_update', value=True)
        except Exception:
            MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed')

        ############################################################
        # Additional string formatting in DTL/VOLDRV services (2.11)
        try:
            if Configuration.get(key='/ovs/framework/migration|voldrv_service_update', default=False) is False:
                sd_pkg_name, sd_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD)
                for vpool in VPoolList.get_vpools():
                    for storagedriver in vpool.storagedrivers:
                        root_client = sr_client_map[storagedriver.storagerouter_guid]
                        for entry in ['dtl', 'volumedriver']:
                            service_name = '{0}_{1}'.format(entry, vpool.name)
                            service_template = StorageDriverInstaller.SERVICE_TEMPLATE_DTL if entry == 'dtl' else StorageDriverInstaller.SERVICE_TEMPLATE_SD
                            config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagedriver.storagerouter.machine_id, service_name)
                            if Configuration.exists(key=config_key):
                                config = Configuration.get(key=config_key)
                                config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR
                                config['VOLDRV_PKG_NAME'] = sd_pkg_name
                                config['VOLDRV_VERSION_CMD'] = sd_version_cmd
                                Configuration.set(key=config_key, value=config)
                                service_manager.regenerate_service(name=service_template,
                                                                   client=root_client,
                                                                   target_name='ovs-{0}'.format(service_name))
                                changed_clients.add(root_client)

                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|voldrv_service_update', value=True)
        except Exception:
            MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed')

        #######################################################
        # Storing actual package name in version files (2.11.0) (https://github.com/openvstorage/framework/issues/1876)
        if Configuration.get(key='/ovs/framework/migration|actual_package_name_in_version_file', default=False) is False:
            try:
                voldrv_pkg_name, _ = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD)
                for storagerouter in StorageRouterList.get_storagerouters():
                    root_client = sr_client_map.get(storagerouter.guid)
                    if root_client is None:
                        continue

                    for file_name in root_client.file_list(directory=ServiceFactory.RUN_FILE_DIR):
                        if not file_name.endswith('.version'):
                            continue
                        file_path = '{0}/{1}'.format(ServiceFactory.RUN_FILE_DIR, file_name)
                        contents = root_client.file_read(filename=file_path)
                        regenerate = False
                        if voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER:
                            if 'volumedriver-server' in contents:
                                regenerate = True
                                contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER)
                                root_client.file_write(filename=file_path, contents=contents)
                        elif voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER_EE:
                            if 'volumedriver-server' in contents or PackageFactory.PKG_VOLDRV_SERVER in contents:
                                regenerate = True
                                contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER_EE)
                                contents = contents.replace(PackageFactory.PKG_VOLDRV_SERVER, PackageFactory.PKG_VOLDRV_SERVER_EE)
                                root_client.file_write(filename=file_path, contents=contents)

                        if regenerate is True:
                            service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_DTL if file_name.startswith('dtl') else StorageDriverInstaller.SERVICE_TEMPLATE_SD,
                                                               client=root_client,
                                                               target_name='ovs-{0}'.format(file_name.split('.')[0]))  # Leave out .version
                            changed_clients.add(root_client)
                Configuration.set(key='/ovs/framework/migration|actual_package_name_in_version_file', value=True)
            except Exception:
                MigrationController._logger.exception('Updating actual package name for version files failed')

        for root_client in changed_clients:
            try:
                root_client.run(['systemctl', 'daemon-reload'])
            except Exception:
                MigrationController._logger.exception('Executing command "systemctl daemon-reload" failed')

        #########################################################
        # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for AlbaProxy SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagedriver in StorageDriverList.get_storagedrivers():
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                for alba_proxy in storagedriver.alba_proxies:
                    service = alba_proxy.service
                    service_name = 'ovs-{0}'.format(service.name)
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue
                    try:
                        service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])
        #########################################################
        # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for Arakoon SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagerouter in StorageRouterList.get_storagerouters():
                root_client = sr_client_map[storagerouter.guid]
                for service_name in service_manager.list_services(client=root_client):
                    if not service_name.startswith('ovs-arakoon-'):
                        continue
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue
                    try:
                        service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])

        MigrationController._logger.info('Finished out of band migrations')
Exemplo n.º 33
0
    def services_running(self, target):
        """
        Check all services are running
        :param target: Target to check
        :return: Boolean
        """
        try:
            key = 'ovs-watcher-{0}'.format(str(uuid.uuid4()))
            value = str(time.time())

            if target == 'config':
                self.log_message(target, 'Testing configuration store...', 0)
                from ovs.extensions.generic.configuration import Configuration
                try:
                    Configuration.list('/')
                except Exception as ex:
                    self.log_message(target, '  Error during configuration store test: {0}'.format(ex), 2)
                    return False
                if Configuration.get_store() == 'arakoon':
                    from ovs.extensions.db.arakoon.configuration import ArakoonConfiguration
                    from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller, ArakoonClusterConfig
                    from ovs.extensions.db.arakoon.pyrakoon.pyrakoon.compat import NoGuarantee
                    with open(ArakoonConfiguration.CACC_LOCATION) as config_file:
                        contents = config_file.read()
                    config = ArakoonClusterConfig(cluster_id='cacc', filesystem=True)
                    config.read_config(contents)
                    client = ArakoonInstaller.build_client(config)
                    contents = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY, consistency=NoGuarantee())
                    if Watcher.LOG_CONTENTS != contents:
                        try:
                            config.read_config(contents)  # Validate whether the contents are not corrupt
                        except Exception as ex:
                            self.log_message(target, '  Configuration stored in configuration store seems to be corrupt: {0}'.format(ex), 2)
                            return False
                        temp_filename = '{0}~'.format(ArakoonConfiguration.CACC_LOCATION)
                        with open(temp_filename, 'w') as config_file:
                            config_file.write(contents)
                            config_file.flush()
                            os.fsync(config_file)
                        os.rename(temp_filename, ArakoonConfiguration.CACC_LOCATION)
                        Watcher.LOG_CONTENTS = contents
                self.log_message(target, '  Configuration store OK', 0)
                return True

            if target == 'framework':
                # Volatile
                self.log_message(target, 'Testing volatile store...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        try:
                            logging.disable(logging.WARNING)
                            from ovs.extensions.storage.volatilefactory import VolatileFactory
                            VolatileFactory.store = None
                            volatile = VolatileFactory.get_client()
                            volatile.set(key, value)
                            if volatile.get(key) == value:
                                volatile.delete(key)
                                break
                            volatile.delete(key)
                        finally:
                            logging.disable(logging.NOTSET)
                    except Exception as message:
                        self.log_message(target, '  Error during volatile store test: {0}'.format(message), 2)
                    key = 'ovs-watcher-{0}'.format(str(uuid.uuid4()))  # Get another key
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(target, '  Volatile store not working correctly', 2)
                    return False
                self.log_message(target, '  Volatile store OK after {0} tries'.format(tries), 0)

                # Persistent
                self.log_message(target, 'Testing persistent store...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        try:
                            logging.disable(logging.WARNING)
                            persistent = PersistentFactory.get_client()
                            persistent.set(key, value)
                            if persistent.get(key) == value:
                                persistent.delete(key)
                                break
                            persistent.delete(key)
                        finally:
                            logging.disable(logging.NOTSET)
                    except Exception as message:
                        self.log_message(target, '  Error during persistent store test: {0}'.format(message), 2)
                    key = 'ovs-watcher-{0}'.format(str(uuid.uuid4()))  # Get another key
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(target, '  Persistent store not working correctly', 2)
                    return False
                self.log_message(target, '  Persistent store OK after {0} tries'.format(tries), 0)

            if target == 'volumedriver':
                # Arakoon, voldrv cluster
                self.log_message(target, 'Testing arakoon (voldrv)...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        from ovs.extensions.generic.configuration import Configuration
                        from ovs.extensions.storage.persistent.pyrakoonstore import PyrakoonStore
                        cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|voldrv'))
                        client = PyrakoonStore(cluster=cluster_name)
                        client.set(key, value)
                        if client.get(key) == value:
                            client.delete(key)
                            break
                        client.delete(key)
                    except Exception as message:
                        self.log_message(target, '  Error during arakoon (voldrv) test: {0}'.format(message), 2)
                    key = 'ovs-watcher-{0}'.format(str(uuid.uuid4()))  # Get another key
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(target, '  Arakoon (voldrv) not working correctly', 2)
                    return False
                self.log_message(target, '  Arakoon (voldrv) OK', 0)

            if target in ['framework', 'volumedriver']:
                # RabbitMQ
                self.log_message(target, 'Test rabbitMQ...', 0)
                import pika
                from ovs.extensions.generic.configuration import Configuration
                messagequeue = Configuration.get('/ovs/framework/messagequeue')
                rmq_servers = messagequeue['endpoints']
                good_node = False
                for server in rmq_servers:
                    try:
                        connection_string = '{0}://{1}:{2}@{3}/%2F'.format(messagequeue['protocol'],
                                                                           messagequeue['user'],
                                                                           messagequeue['password'],
                                                                           server)
                        connection = pika.BlockingConnection(pika.URLParameters(connection_string))
                        channel = connection.channel()
                        channel.basic_publish('', 'ovs-watcher', str(time.time()),
                                              pika.BasicProperties(content_type='text/plain', delivery_mode=1))
                        connection.close()
                        good_node = True
                    except Exception as message:
                        self.log_message(target, '  Error during rabbitMQ test on node {0}: {1}'.format(server, message), 2)
                if good_node is False:
                    self.log_message(target, '  No working rabbitMQ node could be found', 2)
                    return False
                self.log_message(target, '  RabbitMQ test OK', 0)
                self.log_message(target, 'All tests OK', 0)
                return True
        except Exception as ex:
            self.log_message(target, 'Unexpected exception: {0}'.format(ex), 2)
            return False