def setup(**kwargs):
        """
        Execute several actions before starting a new UnitTest
        :param kwargs: Additional key word arguments
        :type kwargs: dict
        """
        DalHelper.setup(**kwargs)

        # noinspection PyProtectedMember
        ManagerClientMockup.clean_data()
        # noinspection PyProtectedMember
        VirtualAlbaBackend.clean_data()
        # noinspection PyProtectedMember
        AlbaController._add_base_configuration()
Пример #2
0
    def claim_asds(alba_backend, nr_of_asds, disk_type=''):
        """
        Claim disks
        :param alba_backend: ALBA Backend
        :param nr_of_asds: Amount of disks to claim
        :param disk_type: Type of disks
        :return: None
        """
        def _wait_for_asd_count_with_status(_alba_backend, _nr_of_asds, status):
            counter = GeneralAlba.ALBA_TIMER / GeneralAlba.ALBA_TIMER_STEP
            asds_with_status = {}
            while counter > 0:
                GeneralAlba.logger.info('counter: {0}'.format(counter))
                _alba_backend.invalidate_dynamics(['local_stack'])
                for node_id in _alba_backend.local_stack:
                    for _disk in _alba_backend.local_stack[node_id].values():
                        for _osd_id, _asd in _disk['asds'].iteritems():
                            if _asd['status'] == status:
                                asds_with_status[_osd_id] = _disk.get('guid')
                GeneralAlba.logger.info('looking for {0} asds with status {1}: {2}'.format(_nr_of_asds, status, asds_with_status))
                if len(asds_with_status) >= _nr_of_asds:
                    break
                counter -= 1
                time.sleep(GeneralAlba.ALBA_TIMER_STEP)
            assert len(asds_with_status) >= _nr_of_asds,\
                "Unable to find {0} asds, only found {1} asds with status: {2}.\n".format(_nr_of_asds, len(asds_with_status), status)
            return asds_with_status

        claimed_asds = []
        alba_backend.invalidate_dynamics(['local_stack'])
        local_stack = alba_backend.local_stack
        for disks in local_stack.values():
            for disk in disks.values():
                for osd_id, asd in disk['asds'].iteritems():
                    if asd['status'] == 'claimed':
                        claimed_asds.append(osd_id)
        nr_asds_to_claim = nr_of_asds - len(claimed_asds)
        if nr_asds_to_claim <= 0:
            return True

        # @TODO: Initialize disks should be parameterized to be parallel or sequential with parallel being the default
        GeneralAlba.initialise_disks(alba_backend, nr_asds_to_claim, disk_type)

        claimable_asds = _wait_for_asd_count_with_status(alba_backend, nr_asds_to_claim, 'available')

        GeneralAlba.logger.info('osds: {0}'.format(claimable_asds))
        AlbaController.add_units(alba_backend.guid, claimable_asds)

        _wait_for_asd_count_with_status(alba_backend, nr_of_asds, 'claimed')
Пример #3
0
 def move_slot(node_guid, slot_id, destination_node_guid):
     """
     Move a slot from one node to another. If the same disk can be accessed, all ASD ownership is moved
     This is a Dual Controller feature
     :param node_guid: Guid of the owner node
     :type node_guid: str
     :param slot_id: Identifier of the slot
     :type slot_id: str
     :param destination_node_guid: Guid of the destination node
     :type destination_node_guid: str
     :return: None
     :rtype: NoneType
     """
     origin_node = AlbaNode(node_guid)
     destination_node = AlbaNode(destination_node_guid)
     # Validation
     if origin_node.alba_node_cluster is None:
         raise ValueError(
             'Node with guid {0} is not part of a cluster'.format(
                 node_guid))
     if origin_node.alba_node_cluster != destination_node.alba_node_cluster:
         raise ValueError('The nodes are not part of the same cluster')
     if slot_id not in origin_node.stack:
         raise ValueError(
             'Slot with ID {0} is not available in the origin node with guid {1}'
             .format(slot_id, node_guid))
     if slot_id not in destination_node.stack:
         raise ValueError(
             'Slot with ID {0} is not available in the destination node with guid {1}'
             .format(slot_id, destination_node_guid))
     # Stop the OSDs on the origin
     try:
         origin_node.client.stop_slot(slot_id)
     except:
         AlbaNodeClusterController._logger.exception(
             'Unable to stop the slot ')
         raise
     try:
         # Update all references in Alba
         AlbaController.update_osds()
         raise NotImplementedError()
     except:
         raise
Пример #4
0
 def get_maintenance_config(self, albabackend):
     # type : (AlbaBackend, int) -> dict
     """
     Set the maintenance config for the Backend
     :param albabackend: ALBA Backend to set the maintenance config for
     :type albabackend: ovs.dal.hybrids.albabackend.AlbaBackend
     :return: Dict that represents the config
     :rtype: dict
     """
     return AlbaController.get_maintenance_config(
         alba_backend_guid=albabackend.guid)
Пример #5
0
 def checkup_nsm_hosts(albabackend_name, amount):
     """
     Checkup the NSM hosts for a certain alba backend
     :param albabackend_name: name of a existing alba backend
     :type albabackend_name: str
     :param amount: amount of min. NSM hosts for a certain backend
     :type amount: int
     :return:
     """
     alba_backend_guid = BackendHelper.get_alba_backend_guid_by_name(
         albabackend_name)
     return AlbaController.nsm_checkup(backend_guid=alba_backend_guid,
                                       min_nsms=int(amount))
Пример #6
0
 def get_maintenance_metadata(self):
     # type: () -> dict
     """
     Return a maintenance layout that the GUI can interpret to create a dynamic form
     :return: Dict with metadata
     :rtype: dict
     """
     metadata = {}
     if AlbaController.can_set_auto_cleanup():
         metadata.update({
             'edit': True,
             'edit_metadata': {
                 'auto_cleanup_deleted_namespaces': 'integer'
             }
         })
     return metadata
Пример #7
0
 def unclaim_disks(alba_backend):
     """
     Un-claim disks
     :param alba_backend: ALBA backend
     :return: None
     """
     # @TODO: Allow the unclaim of disks go sequentially or parallel (parallel should be default)
     alba_backend.invalidate_dynamics(['local_stack'])
     for disks in alba_backend.local_stack.values():
         for disk_id, disk in disks.iteritems():
             if disk['status'] in ['uninitialized']:
                 continue
             asd_node = GeneralAlba.get_node_by_id(disk['node_id'])
             for osd_id in disk['asds'].keys():
                 current_safety = AlbaController.calculate_safety(alba_backend.guid, [osd_id])
                 data = {'asd_id': osd_id,
                         'safety': current_safety}
                 GeneralAlba.logger.info(GeneralAlba.api.execute_post_action('alba/nodes', asd_node.guid, 'reset_asd', data, wait=True))
             data = {'disk': '/dev/disk/by-id/' + disk_id}
             GeneralAlba.logger.info(GeneralAlba.api.execute_post_action('alba/nodes', asd_node.guid, 'remove_disk', data, wait=True))
Пример #8
0
 def set_maintenance_config(self, albabackend, maintenance_config):
     # type : (AlbaBackend, int) -> None
     """
     Set the maintenance config for the Backend
     :param albabackend: ALBA Backend to set the maintenance config for
     :type albabackend: ovs.dal.hybrids.albabackend.AlbaBackend
     :param maintenance_config: Maintenance config as it should be set
     Possible keys:
     - auto_cleanup_deleted_namespaces: Number of days to wait before cleaning up. Setting to 0 means disabling the auto cleanup
     and always clean up a namespace after removing it (int)
     :type maintenance_config: dict
     :return: Asynchronous result of a CeleryTask
     :rtype: celery.result.AsyncResult
     """
     # API implementation can be changed in the future. The whole config is sent through the API but only one setting is used
     days = maintenance_config.get('auto_cleanup_deleted_namespaces')
     if not isinstance(days, int) or 0 > days:
         raise HttpNotAcceptableException(
             error='invalid_data',
             error_description=
             "'auto_cleanup_deleted_namespaces' should be a positive integer or 0"
         )
     return AlbaController.set_auto_cleanup(
         alba_backend_guid=albabackend.guid, days=days)
    def update_preset(alba_backend_guid, name, policies):
        """
        Updates policies for an existing preset to Alba
        :param alba_backend_guid: Guid of the ALBA backend
        :type alba_backend_guid: str
        :param name: Name of backend
        :type name: str
        :param policies: New policy list to be sent to alba
        :type policies: list
        :return: None
        """
        # VALIDATIONS
        AlbaPresetController._validate_policies_param(policies=policies)

        alba_backend = AlbaBackend(alba_backend_guid)
        if name not in [preset['name'] for preset in alba_backend.presets]:
            raise RuntimeError('Could not find a preset with name {0} for ALBA Backend {1}'.format(name, alba_backend.name))

        # UPDATE PRESET
        AlbaPresetController._logger.debug('Updating preset {0} with policies {1}'.format(name, policies))
        config = Configuration.get_configuration_path(ArakoonInstaller.CONFIG_KEY.format(AlbaController.get_abm_cluster_name(alba_backend=alba_backend)))
        temp_config_file = tempfile.mktemp()
        with open(temp_config_file, 'wb') as data_file:
            data_file.write(json.dumps({'policies': policies}))
            data_file.flush()
        AlbaCLI.run(command='update-preset', config=config, named_params={'input-url': temp_config_file}, extra_params=[name])
        alba_backend.invalidate_dynamics()
        os.remove(temp_config_file)
    def delete_preset(alba_backend_guid, name):
        """
        Deletes a preset from the Alba backend
        :param alba_backend_guid: Guid of the ALBA backend
        :type alba_backend_guid: str
        :param name: Name of the preset
        :type name: str
        :return: None
        """
        # VALIDATIONS
        alba_backend = AlbaBackend(alba_backend_guid)
        preset_default_map = dict((preset['name'], preset['is_default']) for preset in alba_backend.presets)
        if name not in preset_default_map:
            AlbaPresetController._logger.warning('Preset with name {0} for ALBA Backend {1} could not be found, so not deleting'.format(name, alba_backend.name))
            return

        if preset_default_map[name] is True:
            raise RuntimeError('Cannot delete the default preset')

        # DELETE PRESET
        AlbaPresetController._logger.debug('Deleting preset {0}'.format(name))
        config = Configuration.get_configuration_path(ArakoonInstaller.CONFIG_KEY.format(AlbaController.get_abm_cluster_name(alba_backend=alba_backend)))
        AlbaCLI.run(command='delete-preset', config=config, extra_params=[name])
        alba_backend.invalidate_dynamics()
    def get_update_information_alba_plugin(information):
        """
        Called when the 'Update' button in the GUI is pressed
        This call collects additional information about the packages which can be updated
        Eg:
            * Downtime for Arakoons
            * Downtime for StorageDrivers
            * Prerequisites that haven't been met
            * Services which will be stopped during update
            * Services which will be restarted after update
        """
        # Verify arakoon info
        arakoon_ovs_info = {'down': False,
                            'name': None,
                            'internal': False}
        arakoon_cacc_info = {'down': False,
                             'name': None,
                             'internal': False}
        for cluster in ['cacc', 'ovsdb']:
            cluster_name = ArakoonClusterConfig.get_cluster_name(cluster)
            if cluster_name is None:
                continue

            if cluster == 'cacc':
                arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name, filesystem=True, ip=System.get_my_storagerouter().ip)
            else:
                arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)

            if arakoon_metadata['internal'] is True:
                config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=(cluster == 'cacc'))
                config.load_config(System.get_my_storagerouter().ip if cluster == 'cacc' else None)
                if cluster == 'ovsdb':
                    arakoon_ovs_info['down'] = len(config.nodes) < 3
                    arakoon_ovs_info['name'] = arakoon_metadata['cluster_name']
                    arakoon_ovs_info['internal'] = True
                else:
                    arakoon_cacc_info['name'] = arakoon_metadata['cluster_name']
                    arakoon_cacc_info['internal'] = True

        # Verify StorageRouter downtime
        fwk_prerequisites = []
        all_storagerouters = StorageRouterList.get_storagerouters()
        for storagerouter in all_storagerouters:
            try:
                SSHClient(endpoint=storagerouter, username='******')
            except UnableToConnectException:
                fwk_prerequisites.append(['node_down', storagerouter.name])

        # Verify ALBA node responsiveness
        alba_prerequisites = []
        for alba_node in AlbaNodeList.get_albanodes():
            try:
                alba_node.client.get_metadata()
            except Exception:
                alba_prerequisites.append(['alba_node_unresponsive', alba_node.ip])

        for key in ['framework', 'alba']:
            if key not in information:
                information[key] = {'packages': {},
                                    'downtime': [],
                                    'prerequisites': fwk_prerequisites if key == 'framework' else alba_prerequisites,
                                    'services_stop_start': set(),
                                    'services_post_update': set()}

            for storagerouter in StorageRouterList.get_storagerouters():
                if key not in storagerouter.package_information:
                    continue

                # Retrieve Arakoon issues
                arakoon_downtime = []
                arakoon_services = []
                for service in storagerouter.services:
                    if service.type.name not in [ServiceType.SERVICE_TYPES.ALBA_MGR, ServiceType.SERVICE_TYPES.NS_MGR]:
                        continue

                    if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR:
                        cluster_name = AlbaController.get_abm_cluster_name(alba_backend=service.abm_service.alba_backend)
                    else:
                        cluster_name = AlbaController.get_nsm_cluster_name(alba_backend=service.nsm_service.alba_backend, number=service.nsm_service.number)
                    if Configuration.exists('/ovs/arakoon/{0}/config'.format(cluster_name), raw=True) is False:
                        continue
                    arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)
                    if arakoon_metadata['internal'] is True:
                        arakoon_services.append('ovs-{0}'.format(service.name))
                        config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=False)
                        config.load_config()
                        if len(config.nodes) < 3:
                            if service.type.name == ServiceType.SERVICE_TYPES.NS_MGR:
                                arakoon_downtime.append(['backend', service.nsm_service.alba_backend.name])
                            else:
                                arakoon_downtime.append(['backend', service.abm_service.alba_backend.name])

                for package_name, package_info in storagerouter.package_information[key].iteritems():
                    if package_name not in AlbaUpdateController.alba_plugin_packages:
                        continue  # Only gather information for the core packages

                    information[key]['services_post_update'].update(package_info.pop('services_to_restart'))
                    if package_name not in information[key]['packages']:
                        information[key]['packages'][package_name] = {}
                    information[key]['packages'][package_name].update(package_info)

                    if package_name == 'openvstorage-backend':
                        if ['gui', None] not in information[key]['downtime']:
                            information[key]['downtime'].append(['gui', None])
                        if ['api', None] not in information[key]['downtime']:
                            information[key]['downtime'].append(['api', None])
                        information[key]['services_stop_start'].update({'watcher-framework', 'memcached'})
                    elif package_name == 'alba':
                        for down in arakoon_downtime:
                            if down not in information[key]['downtime']:
                                information[key]['downtime'].append(down)
                        information[key]['services_post_update'].update(arakoon_services)
                    elif package_name == 'arakoon':
                        if key == 'framework':
                            framework_arakoons = set()
                            if arakoon_ovs_info['internal'] is True:
                                framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_ovs_info['name']))
                            if arakoon_cacc_info['internal'] is True:
                                framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_cacc_info['name']))

                            information[key]['services_post_update'].update(framework_arakoons)
                            if arakoon_ovs_info['down'] is True and ['ovsdb', None] not in information[key]['downtime']:
                                information[key]['downtime'].append(['ovsdb', None])
                        else:
                            for down in arakoon_downtime:
                                if down not in information[key]['downtime']:
                                    information[key]['downtime'].append(down)
                            information[key]['services_post_update'].update(arakoon_services)

            for alba_node in AlbaNodeList.get_albanodes():
                for package_name, package_info in alba_node.package_information.get(key, {}).iteritems():
                    if package_name not in AlbaUpdateController.sdm_packages:
                        continue  # Only gather information for the SDM packages

                    information[key]['services_post_update'].update(package_info.pop('services_to_restart'))
                    if package_name not in information[key]['packages']:
                        information[key]['packages'][package_name] = {}
                    information[key]['packages'][package_name].update(package_info)
        return information
Пример #12
0
    def test_maintenance_agents_for_local_backends_w_layout(self):
        """
        Validates the checkup maintenance agents for LOCAL ALBA Backends with a specific layout specified
        Additionally test whether at least 1 maintenance agent gets deployed even though none of the ALBA Nodes is linked to the ALBA Backend
        """
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={
                'alba_nodes': [1, 2, 3, 4],
                'alba_backends': [[1, 'LOCAL']],
                'alba_abm_clusters': [1]
            })
        alba_backend = alba_structure['alba_backends'][1]
        config_key = AlbaController.AGENTS_LAYOUT_CONFIG_KEY.format(
            alba_backend.guid)
        unknown_node_name = 'non-existing-node'

        # Mock some return values for some of the calls performed by `checkup_maintenance_agents`
        for alba_node in alba_structure['alba_nodes'].itervalues():
            ManagerClientMockup.test_results[alba_node].update({
                'get_stack': {},
                'get_service_status': {
                    'status': [None, 'active']
                },
                'add_maintenance_service':
                '',
                'remove_maintenance_service':
                ''
            })

        ###############################
        # Verify incorrect layout value
        log_entry = 'Layout is not a list and will be ignored'
        Configuration.set(key=config_key,
                          value=unknown_node_name)  # Value should be a list

        # Checkup maintenance agents will not find any suitable ALBA Nodes to deploy a maintenance agent on, because no ALBA Nodes are linked to the ALBA Backend yet,
        # therefore it'll deploy a maintenance on a random ALBA Node
        AlbaController.checkup_maintenance_agents()
        self.assertIn(member=log_entry, container=Logger._logs['lib'].keys())
        self.assertEqual(first='WARNING',
                         second=Logger._logs['lib'][log_entry])

        # Example of ManagerClientMockup.maintenance_agents
        # {<AlbaNode (guid: c015cf06-8bd0-46c5-811d-41ac6f521a63, at: 0x7f77cb0af390)>: {'alba-maintenance_backend_1-J6PMBcEk1Ej42udp': ['node_1']}}
        self.assertEqual(
            first=1, second=len(ManagerClientMockup.maintenance_agents.keys(
            )))  # Only 1 ALBA Node should have a maintenance agent running
        self.assertEqual(
            first=1,
            second=len(ManagerClientMockup.maintenance_agents.values())
        )  # Only 1 maintenance agent should have been deployed on that 1 ALBA Node
        alba_node_w_agent_1 = ManagerClientMockup.maintenance_agents.keys()[0]
        self.assertEqual(
            first=[alba_node_w_agent_1.node_id],
            second=ManagerClientMockup.maintenance_agents[alba_node_w_agent_1].
            values()[0]
        )  # Read preference must be the Node ID of the Node on which the maintenance was deployed

        # 3 out of 4 ALBA Nodes do not have a maintenance agent yet
        alba_nodes_wo_agent = [
            an for an in alba_structure['alba_nodes'].itervalues()
            if an != alba_node_w_agent_1
        ]
        self.assertEqual(first=3, second=len(alba_nodes_wo_agent))

        # Link an ALBA Node without agent to the ALBA Backend, forcing the previously deployed service to be removed and a new 1 to be created on this ALBA Node
        alba_node = alba_nodes_wo_agent[0]
        VirtualAlbaBackend.data['127.0.0.1:35001'] = alba_backend.guid
        ManagerClientMockup.test_results[alba_node]['get_stack'] = {
            alba_node.node_id: {
                'osds': {
                    'osd_id_1': {
                        'ips': ['127.0.0.1'],
                        'port': 35001
                    }
                }
            }
        }
        alba_node.invalidate_dynamics('stack')
        AlbaController.checkup_maintenance_agents()
        self.assertEqual(
            first=1, second=len(ManagerClientMockup.maintenance_agents.keys(
            )))  # Only 1 ALBA Node should have a maintenance agent running
        self.assertEqual(
            first=1,
            second=len(ManagerClientMockup.maintenance_agents.values())
        )  # Only 1 maintenance agent should have been deployed on that 1 ALBA Node
        self.assertNotEqual(
            first=alba_node_w_agent_1, second=alba_node
        )  # The maintenance agent should have moved to the node linked to the ALBA Backend
        self.assertEqual(
            first=[alba_node.node_id],
            second=ManagerClientMockup.maintenance_agents[alba_node].values()
            [0]
        )  # Read preference must be the Node ID of the Node on which the maintenance was moved to

        # Re-scheduling a checkup should not change anything at this point
        AlbaController.checkup_maintenance_agents()
        self.assertEqual(
            first=1, second=len(ManagerClientMockup.maintenance_agents.keys(
            )))  # Only 1 ALBA Node should have a maintenance agent running
        self.assertEqual(
            first=1,
            second=len(ManagerClientMockup.maintenance_agents.values())
        )  # Only 1 maintenance agent should have been deployed on that 1 ALBA Node
        alba_node_w_agent_2 = ManagerClientMockup.maintenance_agents.keys()[0]
        self.assertEqual(first=alba_node, second=alba_node_w_agent_2
                         )  # The maintenance agent should not have moved

        # Set 2 out of 4 ALBA Nodes in the layout key
        alba_nodes_wo_agent = [
            an for an in alba_structure['alba_nodes'].itervalues()
            if an != alba_node_w_agent_2
        ]
        self.assertEqual(first=3, second=len(alba_nodes_wo_agent))
        node_1 = alba_nodes_wo_agent[0]
        node_2 = alba_nodes_wo_agent[1]
        Configuration.set(key=config_key,
                          value=[node_1.node_id, node_2.node_id])
        AlbaController.checkup_maintenance_agents()
        self.assertIn(member=node_1,
                      container=ManagerClientMockup.maintenance_agents
                      )  # Specified in the layout
        self.assertIn(member=node_2,
                      container=ManagerClientMockup.maintenance_agents
                      )  # Specified in the layout
        self.assertEqual(
            first=2, second=len(ManagerClientMockup.maintenance_agents)
        )  # Only the 2 specified ALBA Nodes should be running a maintenance agent
        self.assertEqual(first=1,
                         second=len(
                             ManagerClientMockup.maintenance_agents[node_1])
                         )  # 1 Maintenance agent for this ALBA Node
        self.assertEqual(first=1,
                         second=len(
                             ManagerClientMockup.maintenance_agents[node_2])
                         )  # 1 Maintenance agent for this ALBA Node
        self.assertEqual(
            first=[node_1.node_id],
            second=ManagerClientMockup.maintenance_agents[node_1].values()
            [0])  # Validate the read preference
        self.assertEqual(
            first=[node_2.node_id],
            second=ManagerClientMockup.maintenance_agents[node_2].values()
            [0])  # Validate the read preference

        #########################################
        # Verify all ALBA Nodes unknown in layout
        Logger._logs['lib'] = {}
        log_entry = 'Layout does not contain any known/reachable nodes and will be ignored'
        Configuration.set(key=config_key,
                          value=[unknown_node_name
                                 ])  # Only unknown Nodes in layout
        AlbaController.checkup_maintenance_agents()
        self.assertIn(member=log_entry, container=Logger._logs['lib'].keys())
        self.assertEqual(first='WARNING',
                         second=Logger._logs['lib'][log_entry])
        self.assertIn(
            member=alba_node, container=ManagerClientMockup.maintenance_agents
        )  # The ALBA Node linked to the ALBA Backend should again have the maintenance agent
        self.assertEqual(
            first=1,
            second=len(ManagerClientMockup.maintenance_agents[alba_node]
                       ))  # Only 1 maintenance agent should have been deployed
        self.assertEqual(
            first=[alba_node.node_id],
            second=ManagerClientMockup.maintenance_agents[alba_node].values()
            [0]
        )  # Read preference must be the Node ID of the Node on which the maintenance was deployed

        # 3 out of 4 ALBA Nodes do not have a maintenance agent yet
        alba_nodes_wo_agent = [
            an for an in alba_structure['alba_nodes'].itervalues()
            if an != alba_node_w_agent_1
        ]
        self.assertEqual(first=3, second=len(alba_nodes_wo_agent))

        #############################################
        # Verify at least 1 known ALBA Node in layout
        Logger._logs['lib'] = {}
        node_3 = alba_structure['alba_nodes'][3]
        log_entry = 'Layout contains unknown/unreachable node {0}'.format(
            unknown_node_name)
        Configuration.set(key=config_key,
                          value=[unknown_node_name, node_3.node_id
                                 ])  # 1 known ALBA Node in layout
        AlbaController.checkup_maintenance_agents()
        self.assertIn(member=log_entry, container=Logger._logs['lib'].keys())
        self.assertEqual(first='WARNING',
                         second=Logger._logs['lib'][log_entry])
        self.assertIn(
            member=node_3, container=ManagerClientMockup.maintenance_agents
        )  # The ALBA Node specified in the layout should have the maintenance agent
        self.assertEqual(
            first=1,
            second=len(ManagerClientMockup.maintenance_agents[node_3]
                       ))  # Only 1 maintenance agent should have been deployed
        self.assertEqual(
            first=[node_3.node_id],
            second=ManagerClientMockup.maintenance_agents[node_3].values()[0]
        )  # Read preference must be the Node ID of the Node on which the maintenance was deployed

        # 3 out of 4 ALBA Nodes do not have a maintenance agent yet
        alba_nodes_wo_agent = [
            an for an in alba_structure['alba_nodes'].itervalues()
            if an != node_3
        ]
        self.assertEqual(first=3, second=len(alba_nodes_wo_agent))
Пример #13
0
    def test_maintenance_agents_for_local_backends_wo_layout(self):
        """
        Validates the checkup maintenance agents for LOCAL ALBA Backends without a specific layout specified
        Additionally test:
            * Checkup maintenance agents for a specific ALBA Backend
            * Downscale the required services
            * Upscale the required services
        """
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={
                'alba_nodes': [1, 2, 3, 4],
                'alba_backends': [[1, 'LOCAL']],
                'alba_abm_clusters': [1]
            })

        # Simulate every ALBA Node has 1 OSD for `alba_backend_1`
        local_ip = '127.0.0.1'
        alba_backend_1 = alba_structure['alba_backends'][1]
        for index, alba_node in enumerate(
                alba_structure['alba_nodes'].itervalues()):
            port = 35000 + index
            ManagerClientMockup.test_results[alba_node].update({
                'get_service_status': {
                    'status': [None, 'active']
                },
                'add_maintenance_service':
                '',
                'remove_maintenance_service':
                '',
                'get_stack': {
                    alba_node.node_id: {
                        'osds': {
                            'osd_id_{0}'.format(index): {
                                'ips': [local_ip],
                                'port': port
                            }
                        }
                    }
                }
            })
            VirtualAlbaBackend.data['{0}:{1}'.format(
                local_ip, port)] = alba_backend_1.guid

        # Since all ALBA Nodes (4) are suitable for a maintenance agent, we only deploy a default amount of 3
        AlbaController.checkup_maintenance_agents()
        # Example of ManagerClientMockup.maintenance_agents
        # {
        #     <AlbaNode (guid: d43df79f-9c47-4059-bd84-0f3ef81733c2, at: 0x7f80028e4750)>: {'alba-maintenance_backend_1-RWvL8aCzwIBk6FaZ': ['node_3']},
        #     <AlbaNode (guid: 5dbf972d-2619-48d1-adcd-86ec5b6342f7, at: 0x7f80027ee2d0)>: {'alba-maintenance_backend_1-OgvznajMoRagKCVb': ['node_2']},
        #     <AlbaNode (guid: 79a762f7-3019-4b86-80d6-a5560c52b208, at: 0x7f80027ee0d0)>: {'alba-maintenance_backend_1-ZV9v2vtRfvaYBhhw': ['node_4']}
        # }
        self.assertEqual(
            first=3, second=len(ManagerClientMockup.maintenance_agents)
        )  # 3 out of 4 ALBA Nodes should have a maintenance agent
        for alba_node, maintenance_info in ManagerClientMockup.maintenance_agents.iteritems(
        ):
            self.assertEqual(first=1, second=len(maintenance_info))
            self.assertEqual(first=[alba_node.node_id],
                             second=ManagerClientMockup.
                             maintenance_agents[alba_node].values()[0])

        # Downscale the required amount of services from 3 to 2
        config_key = AlbaController.NR_OF_AGENTS_CONFIG_KEY.format(
            alba_backend_1.guid)
        Configuration.set(key=config_key, value=2)
        nodes_w_agents = ManagerClientMockup.maintenance_agents.keys()
        AlbaController.checkup_maintenance_agents()
        self.assertEqual(
            first=2, second=len(ManagerClientMockup.maintenance_agents)
        )  # 2 out of 4 ALBA Nodes should have a maintenance agent now
        for alba_node, maintenance_info in ManagerClientMockup.maintenance_agents.iteritems(
        ):
            self.assertEqual(first=1, second=len(maintenance_info))
            self.assertEqual(first=[alba_node.node_id],
                             second=ManagerClientMockup.
                             maintenance_agents[alba_node].values()[0])
        for alba_node in ManagerClientMockup.maintenance_agents:
            self.assertIn(
                member=alba_node, container=nodes_w_agents
            )  # 1 removed, rest should still be part of previously used ALBA Nodes

        # Upscale the required amount of services from 2 to 3 again
        Configuration.set(key=config_key, value=3)
        AlbaController.checkup_maintenance_agents()
        self.assertEqual(
            first=3, second=len(ManagerClientMockup.maintenance_agents)
        )  # 3 out of 4 ALBA Nodes should again have a maintenance agent
        for alba_node, maintenance_info in ManagerClientMockup.maintenance_agents.iteritems(
        ):
            self.assertEqual(first=1, second=len(maintenance_info))
            self.assertEqual(first=[alba_node.node_id],
                             second=ManagerClientMockup.
                             maintenance_agents[alba_node].values()[0])

        # Create an additional ALBA Backend and verify that it is not processed when asking to checkup the previously created ALBA Backend
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={
                'alba_backends': [[2, 'LOCAL']],
                'alba_abm_clusters': [2]
            },
            previous_structure=alba_structure)
        alba_backend_2 = alba_structure['alba_backends'][2]
        AlbaController.checkup_maintenance_agents(
            alba_backend_guid=alba_backend_1.guid
        )  # Run checkup for previously created ALBA Backend, nothing should change
        self.assertEqual(
            first=3, second=len(ManagerClientMockup.maintenance_agents)
        )  # 3 out of 4 ALBA Nodes should again have a maintenance agent
        for alba_node, maintenance_info in ManagerClientMockup.maintenance_agents.iteritems(
        ):
            self.assertEqual(first=1, second=len(maintenance_info))
            self.assertEqual(first=[alba_node.node_id],
                             second=ManagerClientMockup.
                             maintenance_agents[alba_node].values()[0])

        # Execute a general checkup maintenance agents and verify newly created ALBA Backend has 1 service (because not linked to any OSDs)
        AlbaController.checkup_maintenance_agents()
        services = []
        for alba_node, maintenance_info in ManagerClientMockup.maintenance_agents.iteritems(
        ):
            alba_node.invalidate_dynamics('maintenance_services')
            if alba_backend_2.name in alba_node.maintenance_services:
                services.append(maintenance_info)
                self.assertEqual(first=1, second=len(maintenance_info))
                self.assertEqual(first=[alba_node.node_id],
                                 second=maintenance_info.values()[0])
        self.assertEqual(
            first=1, second=len(services)
        )  # Only 1 service should have been deployed for the 2nd ALBA Backend
Пример #14
0
    def remove_osd(node_guid, osd_id, expected_safety):
        """
        Removes an OSD
        :param node_guid: Guid of the node to remove an OSD from
        :type node_guid: str
        :param osd_id: ID of the OSD to remove
        :type osd_id: str
        :param expected_safety: Expected safety after having removed the OSD
        :type expected_safety: dict or None
        :return: Aliases of the disk on which the OSD was removed
        :rtype: list
        """
        # Retrieve corresponding OSD in model
        node = AlbaNode(node_guid)
        AlbaNodeController._logger.debug('Removing OSD {0} at node {1}'.format(
            osd_id, node.ip))
        osd = AlbaOSDList.get_by_osd_id(osd_id)
        alba_backend = osd.alba_backend

        if expected_safety is None:
            AlbaNodeController._logger.warning(
                'Skipping safety check for OSD {0} on backend {1} - this is dangerous'
                .format(osd_id, alba_backend.guid))
        else:
            final_safety = AlbaController.calculate_safety(
                alba_backend_guid=alba_backend.guid, removal_osd_ids=[osd_id])
            safety_lost = final_safety['lost']
            safety_crit = final_safety['critical']
            if (safety_crit != 0 or safety_lost != 0) and (
                    safety_crit != expected_safety['critical']
                    or safety_lost != expected_safety['lost']):
                raise RuntimeError(
                    'Cannot remove OSD {0} as the current safety is not as expected ({1} vs {2})'
                    .format(osd_id, final_safety, expected_safety))
            AlbaNodeController._logger.debug(
                'Safety OK for OSD {0} on backend {1}'.format(
                    osd_id, alba_backend.guid))
        AlbaNodeController._logger.debug(
            'Purging OSD {0} on backend {1}'.format(osd_id, alba_backend.guid))
        AlbaController.remove_units(alba_backend_guid=alba_backend.guid,
                                    osd_ids=[osd_id])

        # Delete the OSD
        result = node.client.delete_osd(slot_id=osd.slot_id, osd_id=osd_id)
        if result['_success'] is False:
            raise RuntimeError('Error removing OSD: {0}'.format(
                result['_error']))

        # Clean configuration management and model - Well, just try it at least
        if Configuration.exists(ASD_CONFIG.format(osd_id), raw=True):
            Configuration.delete(ASD_CONFIG_DIR.format(osd_id), raw=True)

        osd.delete()
        node.invalidate_dynamics()
        if alba_backend is not None:
            alba_backend.invalidate_dynamics()
            alba_backend.backend.invalidate_dynamics()
        if node.storagerouter is not None:
            try:
                DiskController.sync_with_reality(
                    storagerouter_guid=node.storagerouter_guid)
            except UnableToConnectException:
                AlbaNodeController._logger.warning(
                    'Skipping disk sync since StorageRouter {0} is offline'.
                    format(node.storagerouter.name))

        return [osd.slot_id]
Пример #15
0
    def test_alba_arakoon_checkup(self):
        """
        Validates whether the ALBA Arakoon checkup works (Manual and Scheduled)
        """
        ovs_structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1]})
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_backends': [[1, 'LOCAL']]})

        #############################
        # SCHEDULED_ARAKOON_CHECKUP #
        #############################
        # Create an ABM and NSM cluster for ALBA Backend 1 and do some basic validations
        sr_1 = ovs_structure['storagerouters'][1]
        ab_1 = alba_structure['alba_backends'][1]
        MockedSSHClient._run_returns[sr_1.ip] = {}
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None
        AlbaController.add_cluster(ab_1.guid)

        abm_cluster_name = '{0}-abm'.format(ab_1.name)
        nsm_cluster_name = '{0}-nsm_0'.format(ab_1.name)
        arakoon_clusters = sorted(Configuration.list('/ovs/arakoon'))
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)

        abm_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=abm_cluster_name)
        nsm_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=nsm_cluster_name)
        self.assertTrue(expr=abm_metadata['in_use'])
        self.assertTrue(expr=nsm_metadata['in_use'])

        # Run scheduled Arakoon checkup and validate amount of Arakoon clusters did not change
        AlbaArakoonController.scheduled_alba_arakoon_checkup()
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)
        self.assertEqual(first=len(ab_1.abm_cluster.abm_services), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services),
                         second=1)

        # Create 2 additional StorageRouters
        srs = DalHelper.build_dal_structure(
            structure={'storagerouters': [2, 3]},
            previous_structure=ovs_structure)['storagerouters']
        sr_2 = srs[2]
        sr_3 = srs[3]

        # Run scheduled checkup again and do some validations
        MockedSSHClient._run_returns[sr_2.ip] = {}
        MockedSSHClient._run_returns[sr_3.ip] = {}
        MockedSSHClient._run_returns[sr_2.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_3.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_3/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_2.ip][
            'arakoon --node {0} -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-abm/config -catchup-only'
            .format(sr_2.machine_id)] = None
        MockedSSHClient._run_returns[sr_3.ip][
            'arakoon --node {0} -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-abm/config -catchup-only'
            .format(sr_3.machine_id)] = None
        AlbaArakoonController.scheduled_alba_arakoon_checkup()
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)
        self.assertEqual(first=len(ab_1.abm_cluster.abm_services),
                         second=3)  # Gone up from 1 to 3
        self.assertEqual(first=len(ab_1.nsm_clusters), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services),
                         second=1)  # Still 1 since NSM checkup hasn't ran yet

        # Make sure 1 StorageRouter is unreachable
        SSHClient._raise_exceptions[sr_3.ip] = {
            'users': ['ovs'],
            'exception': UnableToConnectException('No route to host')
        }
        AlbaArakoonController.scheduled_alba_arakoon_checkup()
        alba_logs = Logger._logs.get('lib', [])
        self.assertIn(
            member='Storage Router with IP {0} is not reachable'.format(
                sr_3.ip),
            container=alba_logs)

        ##########################
        # MANUAL_ARAKOON_CHECKUP #
        ##########################
        AlbaDalHelper.setup()  # Clear everything
        ovs_structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1]})
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_backends': [[1, 'LOCAL']]})
        sr_1 = ovs_structure['storagerouters'][1]
        ab_1 = alba_structure['alba_backends'][1]
        MockedSSHClient._run_returns[sr_1.ip] = {}
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None
        AlbaController.add_cluster(ab_1.guid)

        # Run manual Arakoon checkup and validate amount of Arakoon clusters did not change
        AlbaArakoonController.manual_alba_arakoon_checkup(
            alba_backend_guid=ab_1.guid, nsm_clusters=[], abm_cluster=None)
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)
        self.assertEqual(first=len(ab_1.abm_cluster.abm_services), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services),
                         second=1)

        # Test some error paths
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=['no_abm_cluster_passed'])
        self.assertEqual(
            first=raise_info.exception.message,
            second='Both ABM cluster and NSM clusters must be provided')
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=[],
                abm_cluster='no_nsm_clusters_passed')
        self.assertEqual(
            first=raise_info.exception.message,
            second='Both ABM cluster and NSM clusters must be provided')
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=[nsm_cluster_name],
                abm_cluster=abm_cluster_name)
        self.assertEqual(first=raise_info.exception.message,
                         second='Cluster {0} has already been claimed'.format(
                             abm_cluster_name))
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=['non-existing-nsm-cluster'],
                abm_cluster='non-existing-abm-cluster')
        self.assertEqual(
            first=raise_info.exception.message,
            second=
            'Could not find an Arakoon cluster with name: non-existing-abm-cluster'
        )

        # Recreate ALBA Backend with Arakoon clusters
        AlbaDalHelper.setup()  # Clear everything
        ovs_structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1]})
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_backends': [[1, 'LOCAL']]})
        sr_1 = ovs_structure['storagerouters'][1]
        ab_1 = alba_structure['alba_backends'][1]

        # Create some Arakoon clusters to be claimed by the manual checkup
        for cluster_name, cluster_type in {
                'manual-abm-1': ServiceType.ARAKOON_CLUSTER_TYPES.ABM,
                'manual-abm-2': ServiceType.ARAKOON_CLUSTER_TYPES.ABM,
                'manual-nsm-1': ServiceType.ARAKOON_CLUSTER_TYPES.NSM,
                'manual-nsm-2': ServiceType.ARAKOON_CLUSTER_TYPES.NSM,
                'manual-nsm-3': ServiceType.ARAKOON_CLUSTER_TYPES.NSM
        }.iteritems():
            arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
            arakoon_installer.create_cluster(
                cluster_type=cluster_type,
                ip=sr_1.ip,
                base_dir=DalHelper.CLUSTER_DIR.format(cluster_name),
                internal=False)
            arakoon_installer.start_cluster()
            arakoon_installer.unclaim_cluster()
        AlbaArakoonController.manual_alba_arakoon_checkup(
            alba_backend_guid=ab_1.guid,
            nsm_clusters=['manual-nsm-1', 'manual-nsm-3'],
            abm_cluster='manual-abm-2')

        # Validate the correct clusters have been claimed by the manual checkup
        unused_abms = ArakoonInstaller.get_unused_arakoon_clusters(
            cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.ABM)
        unused_nsms = ArakoonInstaller.get_unused_arakoon_clusters(
            cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.NSM)
        self.assertEqual(first=len(unused_abms), second=1)
        self.assertEqual(first=len(unused_nsms), second=1)
        self.assertEqual(first=unused_abms[0]['cluster_name'],
                         second='manual-abm-1')
        self.assertEqual(first=unused_nsms[0]['cluster_name'],
                         second='manual-nsm-2')
Пример #16
0
    def test_nsm_checkup_internal(self):
        """
        Validates whether the NSM checkup works for internally managed Arakoon clusters
        """
        Configuration.set('/ovs/framework/plugins/alba/config|nsm.safety', 1)
        Configuration.set('/ovs/framework/plugins/alba/config|nsm.maxload', 10)

        structure = DalHelper.build_dal_structure(structure={'storagerouters': [1]})
        alba_structure = AlbaDalHelper.build_dal_structure(structure={'alba_backends': [[1, 'LOCAL']]})

        alba_backend = alba_structure['alba_backends'][1]
        storagerouter_1 = structure['storagerouters'][1]

        MockedSSHClient._run_returns[storagerouter_1.ip] = {}
        MockedSSHClient._run_returns[storagerouter_1.ip]['ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[storagerouter_1.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None

        VirtualAlbaBackend.run_log = {}
        AlbaController.add_cluster(alba_backend.guid)

        # Validation of nsm_checkup
        with self.assertRaises(ValueError):
            AlbaArakoonController.nsm_checkup(min_internal_nsms=0)  # Min_nsms should be at least 1

        # Validate single node NSM cluster
        self._validate_nsm([['1']])
        self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['update_abm_client_config'],
                                                                           ['add_nsm_host', 'backend_1-nsm_0'],
                                                                           ['update_maintenance_config', '--eviction-type-random'],
                                                                           ['update_maintenance_config', 'enable-auto-cleanup-deleted-namespaces-days']])

        VirtualAlbaBackend.run_log['backend_1-abm'] = []
        AlbaArakoonController.nsm_checkup()

        # Running the NSM checkup should not change anything after an add_cluster
        self._validate_nsm([['1']])
        self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [])

        structure = DalHelper.build_dal_structure(structure={'storagerouters': [2]}, previous_structure=structure)
        VirtualAlbaBackend.run_log['backend_1-abm'] = []
        AlbaArakoonController.nsm_checkup()

        # There should still be one NSM, since the safety is still at 1
        self._validate_nsm([['1']])
        self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [])

        Configuration.set('/ovs/framework/plugins/alba/config|nsm.safety', 2)
        VirtualAlbaBackend.run_log['backend_1-abm'] = []
        AlbaArakoonController.nsm_checkup()

        # There should still be one NSM, since the ABM isn't extended yet
        self._validate_nsm([['1']])
        self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [])

        storagerouter_2 = structure['storagerouters'][2]
        MockedSSHClient._run_returns[storagerouter_2.ip] = {}
        MockedSSHClient._run_returns[storagerouter_2.ip]['arakoon --node 2 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-abm/config -catchup-only'] = None
        MockedSSHClient._run_returns[storagerouter_2.ip]['ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        VirtualAlbaBackend.run_log['backend_1-abm'] = []
        AlbaArakoonController.manual_alba_arakoon_checkup(alba_backend.guid, nsm_clusters=[])

        self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['update_abm_client_config']])

        MockedSSHClient._run_returns[storagerouter_2.ip]['arakoon --node 2 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-nsm_0/config -catchup-only'] = None
        MockedSSHClient._run_returns[storagerouter_2.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None
        VirtualAlbaBackend.run_log['backend_1-abm'] = []
        AlbaArakoonController.nsm_checkup()

        # Now that the ABM was extended, the NSM should also be extended
        self._validate_nsm([['1', '2']])
        self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['update_nsm_host', 'backend_1-nsm_0']])

        MockedSSHClient._run_returns[storagerouter_1.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_1/db'] = None
        MockedSSHClient._run_returns[storagerouter_2.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_1/db'] = None
        MockedSSHClient._run_returns[storagerouter_1.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-nsm_1/db'] = None
        MockedSSHClient._run_returns[storagerouter_2.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-nsm_1/db'] = None
        MockedSSHClient._run_returns[storagerouter_1.ip]['arakoon --node 1 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-nsm_1/config -catchup-only'] = None
        MockedSSHClient._run_returns[storagerouter_2.ip]['arakoon --node 2 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-nsm_1/config -catchup-only'] = None
        VirtualAlbaBackend.run_log['backend_1-abm'] = []
        AlbaArakoonController.nsm_checkup(min_internal_nsms=2)

        # A second NSM cluster (running on two nodes) should be added
        self._validate_nsm([['1', '2'],
                            ['1', '2']])
        self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['add_nsm_host', 'backend_1-nsm_1']])

        VirtualAlbaBackend.data['backend_1-abm']['nsms'][0]['namespaces_count'] = 25

        VirtualAlbaBackend.run_log['backend_1-abm'] = []
        AlbaArakoonController.nsm_checkup()

        # Nothing should be happened, since there's still a non-overloaded NSM
        self._validate_nsm([['1', '2'],
                            ['1', '2']])
        self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [])

        VirtualAlbaBackend.data['backend_1-abm']['nsms'][1]['namespaces_count'] = 25

        MockedSSHClient._run_returns[storagerouter_1.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_2/db'] = None
        MockedSSHClient._run_returns[storagerouter_2.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_2/db'] = None
        MockedSSHClient._run_returns[storagerouter_1.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-nsm_2/db'] = None
        MockedSSHClient._run_returns[storagerouter_2.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-nsm_2/db'] = None
        MockedSSHClient._run_returns[storagerouter_1.ip]['arakoon --node 1 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-nsm_2/config -catchup-only'] = None
        MockedSSHClient._run_returns[storagerouter_2.ip]['arakoon --node 2 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-nsm_2/config -catchup-only'] = None
        VirtualAlbaBackend.run_log['backend_1-abm'] = []
        AlbaArakoonController.nsm_checkup()

        # A third NSM cluster (running on two nodes) should be added
        self._validate_nsm([['1', '2'],
                            ['1', '2'],
                            ['1', '2']])
        self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['add_nsm_host', 'backend_1-nsm_2']])

        VirtualAlbaBackend.run_log['backend_1-abm'] = []
        AlbaArakoonController.nsm_checkup()

        # Running the checkup should not change anything
        self._validate_nsm([['1', '2'],
                            ['1', '2'],
                            ['1', '2']])
        self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [])
Пример #17
0
 def check_nsm_load(cls, result_handler, max_load=None, use_total_capacity=False, total_capacity_warning=None, total_capacity_error=None):
     """
     Checks all NSM services registered within the Framework and will report their load
     :param result_handler: logging object
     :type result_handler: ovs.extensions.healthcheck.result.HCResults
     :param max_load: Maximum load percentage before marking it as overloaded. Defaults to ovs/framework/plugins/alba/config|nsm.maxload
     :type max_load: float
     :param use_total_capacity: Base NSM load of the total possible capacity (capacity of NSMs before they are marked as overloaded)
     instead of checking the least filled NSM. Use threshold arguments for tuning'
     :type use_total_capacity: bool
     :param total_capacity_warning: Number of remaining namespaces threshold before throwing a warning. Defaults 20% of the total namespaces
     :type total_capacity_warning: int
     :param total_capacity_error: Number of remaining namespaces threshold before throwing an error. Defaults to 5% of the total namespaces
     :type total_capacity_error: int
     :return: None
     :rtype: NoneType
     """
     max_nsm_load_config = Configuration.get('ovs/framework/plugins/alba/config|nsm.maxload')
     max_load = max_load or max_nsm_load_config
     for alba_backend in AlbaBackendList.get_albabackends():
         if alba_backend.abm_cluster is None:
             result_handler.failure('No ABM cluster found for ALBA Backend {0}'.format(alba_backend.name))
             continue
         if len(alba_backend.abm_cluster.abm_services) == 0:
             result_handler.failure('ALBA Backend {0} does not have any registered ABM services'.format(alba_backend.name))
             continue
         if len(alba_backend.nsm_clusters) == 0:
             result_handler.failure('ALBA Backend {0} does not have any registered NSM services'.format(alba_backend.name))
             continue
         internal = alba_backend.abm_cluster.abm_services[0].service.is_internal
         if use_total_capacity:
             maximum_capacity_before_overload = AlbaHealthCheck._get_nsm_max_capacity_before_overload(alba_backend, max_nsm_load_config)
             total_capacity_warning = total_capacity_warning or math.ceil(maximum_capacity_before_overload * 1.0/5)
             total_capacity_error = total_capacity_error or math.ceil(maximum_capacity_before_overload * 1.0/20)
             config = Configuration.get_configuration_path(key=alba_backend.abm_cluster.config_location)
             hosts_data = AlbaCLI.run(command='list-nsm-hosts', config=config)
             current_capacity = sum([host['namespaces_count'] for host in hosts_data if not host['lost']])
             remaining_capacity = maximum_capacity_before_overload - current_capacity
             if remaining_capacity > total_capacity_warning and remaining_capacity > total_capacity_error:  # Only error could be specified
                 result_handler.success('NSMs for backend {0} have enough capacity remaining ({1}/{2} used)'.format(alba_backend.name, current_capacity, maximum_capacity_before_overload),
                                        code=ErrorCodes.nsm_load_ok)
             elif total_capacity_warning >= remaining_capacity > total_capacity_error:
                 result_handler.warning('NSMs for backend {0} have reached the warning threshold '
                                        '({1} namespaces had to be remaining, {2}/{3} used)'.format(alba_backend.name, total_capacity_warning, current_capacity, maximum_capacity_before_overload),
                                        code=ErrorCodes.nsm_load_ok)
             else:
                 result_handler.failure('NSMs for backend {0} have reached the error threshold '
                                        '({1} namespaces had to be remaining, ({2}/{3} used)'.format(alba_backend.name, total_capacity_error, current_capacity, maximum_capacity_before_overload),
                                        code=ErrorCodes.nsm_load_ok)
         else:
             nsm_loads = {}
             sorted_nsm_clusters = sorted(alba_backend.nsm_clusters, key=lambda k: k.number)
             for nsm_cluster in sorted_nsm_clusters:
                 nsm_loads[nsm_cluster.number] = AlbaController.get_load(nsm_cluster)
             overloaded = min(nsm_loads.values()) >= max_load
             if overloaded is False:
                 result_handler.success('NSMs for backend {0} are not overloaded'.format(alba_backend.name),
                                        code=ErrorCodes.nsm_load_ok)
             else:
                 if internal is True:
                     result_handler.warning('NSMs for backend {0} are overloaded. The NSM checkup will take care of this'.format(alba_backend.name),
                                            code=ErrorCodes.nsm_load_warn)
                 else:
                     result_handler.failure('NSMs for backend {0} are overloaded. Please add your own NSM clusters to the backend'.format(alba_backend.name),
                                            code=ErrorCodes.nsm_load_failure)
Пример #18
0
 def checkup_maintenance_agents():
     """
     Perform a maintenance agent checkup
     :return: None
     """
     AlbaController.checkup_maintenance_agents()
Пример #19
0
    def migrate():
        """
        Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually
        executed. This code will typically contain:
        * "dangerous" migration code (it needs certain running services)
        * Migration code depending on a cluster-wide state
        * ...
        """
        AlbaMigrationController._logger.info(
            'Preparing out of band migrations...')

        from ovs.dal.hybrids.diskpartition import DiskPartition
        from ovs.dal.lists.albabackendlist import AlbaBackendList
        from ovs.dal.lists.albanodelist import AlbaNodeList
        from ovs.dal.lists.albaosdlist import AlbaOSDList
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.extensions.generic.configuration import Configuration
        from ovs.extensions.generic.sshclient import SSHClient, UnableToConnectException
        from ovs.extensions.migration.migration.albamigrator import ExtensionMigrator
        from ovs.extensions.packages.albapackagefactory import PackageFactory
        from ovs.extensions.services.albaservicefactory import ServiceFactory
        from ovs.extensions.plugins.albacli import AlbaCLI, AlbaError
        from ovs.lib.alba import AlbaController
        from ovs.lib.disk import DiskController

        AlbaMigrationController._logger.info('Start out of band migrations...')

        #############################################
        # Introduction of IP:port combination on OSDs
        osd_info_map = {}
        alba_backends = AlbaBackendList.get_albabackends()
        for alba_backend in alba_backends:
            AlbaMigrationController._logger.info(
                'Verifying ALBA Backend {0}'.format(alba_backend.name))
            if alba_backend.abm_cluster is None:
                AlbaMigrationController._logger.warning(
                    'ALBA Backend {0} does not have an ABM cluster registered'.
                    format(alba_backend.name))
                continue

            AlbaMigrationController._logger.debug(
                'Retrieving configuration path for ALBA Backend {0}'.format(
                    alba_backend.name))
            try:
                config = Configuration.get_configuration_path(
                    alba_backend.abm_cluster.config_location)
            except:
                AlbaMigrationController._logger.exception(
                    'Failed to retrieve the configuration path for ALBA Backend {0}'
                    .format(alba_backend.name))
                continue

            AlbaMigrationController._logger.info(
                'Retrieving OSD information for ALBA Backend {0}'.format(
                    alba_backend.name))
            try:
                osd_info = AlbaCLI.run(command='list-all-osds', config=config)
            except (AlbaError, RuntimeError):
                AlbaMigrationController._logger.exception(
                    'Failed to retrieve OSD information for ALBA Backend {0}'.
                    format(alba_backend.name))
                continue

            for osd_info in osd_info:
                if osd_info.get('long_id'):
                    osd_info_map[osd_info['long_id']] = {
                        'ips': osd_info.get('ips', []),
                        'port': osd_info.get('port')
                    }

        for osd in AlbaOSDList.get_albaosds():
            if osd.osd_id not in osd_info_map:
                AlbaMigrationController._logger.warning(
                    'OSD with ID {0} is modelled but could not be found through ALBA'
                    .format(osd.osd_id))
                continue

            ips = osd_info_map[osd.osd_id]['ips']
            port = osd_info_map[osd.osd_id]['port']
            changes = False
            if osd.ips is None:
                changes = True
                osd.ips = ips
            if osd.port is None:
                changes = True
                osd.port = port
            if changes is True:
                AlbaMigrationController._logger.info(
                    'Updating OSD with ID {0} with IPS {1} and port {2}'.
                    format(osd.osd_id, ips, port))
                osd.save()

        ###################################################
        # Read preference for GLOBAL ALBA Backends (1.10.3)  (https://github.com/openvstorage/framework-alba-plugin/issues/452)
        if Configuration.get(key='/ovs/framework/migration|read_preference',
                             default=False) is False:
            try:
                name_backend_map = dict((alba_backend.name, alba_backend)
                                        for alba_backend in alba_backends)
                for alba_node in AlbaNodeList.get_albanodes():
                    AlbaMigrationController._logger.info(
                        'Processing maintenance services running on ALBA Node {0} with ID {1}'
                        .format(alba_node.ip, alba_node.node_id))
                    alba_node.invalidate_dynamics('maintenance_services')
                    for alba_backend_name, services in alba_node.maintenance_services.iteritems(
                    ):
                        if alba_backend_name not in name_backend_map:
                            AlbaMigrationController._logger.error(
                                'ALBA Node {0} has services for an ALBA Backend {1} which is not modelled'
                                .format(alba_node.ip, alba_backend_name))
                            continue

                        alba_backend = name_backend_map[alba_backend_name]
                        AlbaMigrationController._logger.info(
                            'Processing {0} ALBA Backend {1} with GUID {2}'.
                            format(alba_backend.scaling, alba_backend.name,
                                   alba_backend.guid))
                        if alba_backend.scaling == alba_backend.SCALINGS.LOCAL:
                            read_preferences = [alba_node.node_id]
                        else:
                            read_preferences = AlbaController.get_read_preferences_for_global_backend(
                                alba_backend=alba_backend,
                                alba_node_id=alba_node.node_id,
                                read_preferences=[])

                        for service_name, _ in services:
                            AlbaMigrationController._logger.info(
                                'Processing service {0}'.format(service_name))
                            old_config_key = '/ovs/alba/backends/{0}/maintenance/config'.format(
                                alba_backend.guid)
                            new_config_key = '/ovs/alba/backends/{0}/maintenance/{1}/config'.format(
                                alba_backend.guid, service_name)
                            if Configuration.exists(key=old_config_key):
                                new_config = Configuration.get(
                                    key=old_config_key)
                                new_config[
                                    'read_preference'] = read_preferences
                                Configuration.set(key=new_config_key,
                                                  value=new_config)
                for alba_backend in alba_backends:
                    Configuration.delete(
                        key='/ovs/alba/backends/{0}/maintenance/config'.format(
                            alba_backend.guid))
                AlbaController.checkup_maintenance_agents.delay()

                Configuration.set(
                    key='/ovs/framework/migration|read_preference', value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating read preferences for ALBA Backends failed')

        #######################################################
        # Storing actual package name in version files (1.11.0) (https://github.com/openvstorage/framework/issues/1876)
        changed_clients = set()
        storagerouters = StorageRouterList.get_storagerouters()
        if Configuration.get(
                key=
                '/ovs/framework/migration|actual_package_name_in_version_file_alba',
                default=False) is False:
            try:
                service_manager = ServiceFactory.get_manager()
                alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for(
                    component=PackageFactory.COMP_ALBA)
                for storagerouter in storagerouters:
                    try:
                        root_client = SSHClient(
                            endpoint=storagerouter.ip, username='******'
                        )  # Use '.ip' instead of StorageRouter object because this code is executed during post-update at which point the heartbeat has not been updated for some time
                    except UnableToConnectException:
                        AlbaMigrationController._logger.exception(
                            'Updating actual package name for version files failed on StorageRouter {0}'
                            .format(storagerouter.ip))
                        continue

                    for file_name in root_client.file_list(
                            directory=ServiceFactory.RUN_FILE_DIR):
                        if not file_name.endswith('.version'):
                            continue
                        file_path = '{0}/{1}'.format(
                            ServiceFactory.RUN_FILE_DIR, file_name)
                        contents = root_client.file_read(filename=file_path)
                        if alba_pkg_name == PackageFactory.PKG_ALBA_EE and '{0}='.format(
                                PackageFactory.PKG_ALBA) in contents:
                            # Rewrite the version file in the RUN_FILE_DIR
                            contents = contents.replace(
                                PackageFactory.PKG_ALBA,
                                PackageFactory.PKG_ALBA_EE)
                            root_client.file_write(filename=file_path,
                                                   contents=contents)

                            # Regenerate the service and update the EXTRA_VERSION_CMD in the configuration management
                            service_name = file_name.split('.')[0]
                            service_config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(
                                storagerouter.machine_id, service_name)
                            if Configuration.exists(key=service_config_key):
                                service_config = Configuration.get(
                                    key=service_config_key)
                                if 'EXTRA_VERSION_CMD' in service_config:
                                    service_config[
                                        'EXTRA_VERSION_CMD'] = '{0}=`{1}`'.format(
                                            alba_pkg_name, alba_version_cmd)
                                    Configuration.set(key=service_config_key,
                                                      value=service_config)
                                    service_manager.regenerate_service(
                                        name='ovs-arakoon',
                                        client=root_client,
                                        target_name='ovs-{0}'.format(
                                            service_name)
                                    )  # Leave out .version
                                    changed_clients.add(root_client)
                Configuration.set(
                    key=
                    '/ovs/framework/migration|actual_package_name_in_version_file_alba',
                    value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating actual package name for version files failed')

        for root_client in changed_clients:
            try:
                root_client.run(['systemctl', 'daemon-reload'])
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Executing command "systemctl daemon-reload" failed')

        ####################################
        # Fix for migration version (1.11.0)
        # Previous code could potentially store a higher version number in the config management than the actual version number
        if Configuration.get(
                key='/ovs/framework/migration|alba_migration_version_fix',
                default=False) is False:
            try:
                for storagerouter in storagerouters:
                    config_key = '/ovs/framework/hosts/{0}/versions'.format(
                        storagerouter.machine_id)
                    if Configuration.exists(key=config_key):
                        versions = Configuration.get(key=config_key)
                        if versions.get(PackageFactory.COMP_MIGRATION_ALBA,
                                        0) > ExtensionMigrator.THIS_VERSION:
                            versions[
                                PackageFactory.
                                COMP_MIGRATION_ALBA] = ExtensionMigrator.THIS_VERSION
                            Configuration.set(key=config_key, value=versions)
                Configuration.set(
                    key='/ovs/framework/migration|alba_migration_version_fix',
                    value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating migration version failed')

        ####################################
        # Enable auto-cleanup
        migration_auto_cleanup_key = '/ovs/framework/migration|alba_auto_cleanup'
        if Configuration.get(key=migration_auto_cleanup_key,
                             default=False) is False:
            try:
                for storagerouter in StorageRouterList.get_storagerouters():
                    storagerouter.invalidate_dynamics(
                        'features')  # New feature was added
                errors = []
                for alba_backend in AlbaBackendList.get_albabackends():
                    try:
                        AlbaController.set_auto_cleanup(alba_backend.guid)
                    except Exception as ex:
                        AlbaMigrationController._logger.exception(
                            'Failed to set the auto-cleanup for ALBA Backend {0}'
                            .format(alba_backend.name))
                        errors.append(ex)
                if len(errors) == 0:
                    Configuration.set(key=migration_auto_cleanup_key,
                                      value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating auto cleanup failed')

        ####################################
        # Change cache eviction
        migration_random_eviction_key = '/ovs/framework/migration|alba_cache_eviction_random'
        if Configuration.get(key=migration_random_eviction_key,
                             default=False) is False:
            try:
                errors = []
                for alba_backend in AlbaBackendList.get_albabackends():
                    try:
                        AlbaController.set_cache_eviction(alba_backend.guid)
                    except Exception as ex:
                        AlbaMigrationController._logger.exception(
                            'Failed to set the auto-cleanup for ALBA Backend {0}'
                            .format(alba_backend.name))
                        errors.append(ex)
                if len(errors) == 0:
                    Configuration.set(key=migration_random_eviction_key,
                                      value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating auto cleanup failed')

        ###################################################
        # Sync all disks and apply the backend role. Backend role was removed with the AD (since 1.10)
        albanode_backend_role_sync_key = '/ovs/framework/migration|albanode_backend_role_sync'
        if not Configuration.get(key=albanode_backend_role_sync_key,
                                 default=False):
            try:
                errors = []
                for alba_node in AlbaNodeList.get_albanodes():
                    try:
                        if not alba_node.storagerouter:
                            continue
                        stack = alba_node.client.get_stack()  # type: dict
                        for slot_id, slot_information in stack.iteritems():
                            osds = slot_information.get('osds',
                                                        {})  # type: dict
                            slot_aliases = slot_information.get(
                                'aliases', [])  # type: list
                            if not osds:  # No osds means no partition was made
                                continue
                            # Sync to add all potential partitions that will need a backend role
                            DiskController.sync_with_reality(
                                storagerouter_guid=alba_node.storagerouter_guid
                            )
                            for disk in alba_node.storagerouter.disks:
                                if set(disk.aliases).intersection(
                                        set(slot_aliases)):
                                    partition = disk.partitions[0]
                                    if DiskPartition.ROLES.BACKEND not in partition.roles:
                                        partition.roles.append(
                                            DiskPartition.ROLES.BACKEND)
                                        partition.save()
                    except Exception as ex:
                        AlbaMigrationController._logger.exception(
                            'Syncing for storagerouter/albanode {0} failed'.
                            format(alba_node.storagerouter.ip))
                        errors.append(ex)
                if not errors:
                    Configuration.set(key=albanode_backend_role_sync_key,
                                      value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Syncing up the disks for backend roles failed')

        AlbaMigrationController._logger.info('Finished out of band migrations')
    def add_preset(alba_backend_guid, name, compression, policies, encryption, fragment_size=None):
        """
        Adds a preset to Alba
        :param alba_backend_guid: Guid of the ALBA backend
        :type alba_backend_guid: str
        :param name: Name of the preset
        :type name: str
        :param compression: Compression type for the preset (none | snappy | bz2)
        :type compression: str
        :param policies: Policies for the preset
        :type policies: list
        :param encryption: Encryption for the preset (none | aes-cbc-256 | aes-ctr-256)
        :type encryption: str
        :param fragment_size: Size of a fragment in bytes (e.g. 1048576)
        :type fragment_size: int
        :return: None
        """
        # VALIDATIONS
        if not re.match(Toolbox.regex_preset, name):
            raise ValueError('Invalid preset name specified')

        compression_options = ['snappy', 'bz2', 'none']
        if compression not in compression_options:
            raise ValueError('Invalid compression format specified, please choose from: "{0}"'.format('", "'.join(compression_options)))

        encryption_options = ['aes-cbc-256', 'aes-ctr-256', 'none']
        if encryption not in encryption_options:
            raise ValueError('Invalid encryption format specified, please choose from: "{0}"'.format('", "'.join(encryption_options)))

        if fragment_size is not None and (not isinstance(fragment_size, int) or not 16 <= fragment_size <= 1024 ** 3):
            raise ValueError('Fragment size should be a positive integer smaller than 1 GiB')

        AlbaPresetController._validate_policies_param(policies=policies)

        alba_backend = AlbaBackend(alba_backend_guid)
        if name in [preset['name'] for preset in alba_backend.presets]:
            raise RuntimeError('Preset with name {0} already exists'.format(name))

        # ADD PRESET
        preset = {'compression': compression,
                  'object_checksum': {'default': ['crc-32c'],
                                      'verify_upload': True,
                                      'allowed': [['none'], ['sha-1'], ['crc-32c']]},
                  'osds': ['all'],
                  'fragment_size': 16 * 1024 ** 2 if fragment_size is None else int(fragment_size),
                  'policies': policies,
                  'fragment_checksum': ['crc-32c'],
                  'fragment_encryption': ['none'],
                  'in_use': False,
                  'name': name}

        # Generate encryption key
        temp_key_file = None
        if encryption != 'none':
            encryption_key = ''.join(random.choice(chr(random.randint(32, 126))) for _ in range(32))
            temp_key_file = tempfile.mktemp()
            with open(temp_key_file, 'wb') as temp_file:
                temp_file.write(encryption_key)
                temp_file.flush()
            preset['fragment_encryption'] = ['{0}'.format(encryption), '{0}'.format(temp_key_file)]

        # Dump preset content on filesystem
        config = Configuration.get_configuration_path(ArakoonInstaller.CONFIG_KEY.format(AlbaController.get_abm_cluster_name(alba_backend=alba_backend)))
        temp_config_file = tempfile.mktemp()
        with open(temp_config_file, 'wb') as data_file:
            data_file.write(json.dumps(preset))
            data_file.flush()

        # Create preset
        AlbaPresetController._logger.debug('Adding preset {0} with compression {1} and policies {2}'.format(name, compression, policies))
        AlbaCLI.run(command='create-preset', config=config, named_params={'input-url': temp_config_file}, extra_params=[name])

        # Cleanup
        alba_backend.invalidate_dynamics()
        for filename in [temp_key_file, temp_config_file]:
            if filename and os.path.exists(filename) and os.path.isfile(filename):
                os.remove(filename)
Пример #21
0
    def fill_slots(node_guid, osd_information, metadata=None):
        """
        Creates 1 or more new OSDs
        :param node_guid: Guid of the node to which the disks belong
        :type node_guid: str
        :param osd_information: Information about the amount of OSDs to add to each Slot
        :type osd_information: list
        :param metadata: Metadata to add to the OSD (connection information for remote Backend, general Backend information)
        :type metadata: dict
        :return: None
        :rtype: NoneType
        """
        metadata_type_validation = {
            'integer': (int, None),
            'osd_type': (str, AlbaOSD.OSD_TYPES.keys()),
            'ip': (str, ExtensionsToolbox.regex_ip),
            'port': (int, {
                'min': 1,
                'max': 65535
            })
        }
        node = AlbaNode(node_guid)
        required_params = {'slot_id': (str, None)}
        can_be_filled = False
        for flow in ['fill', 'fill_add']:
            if node.node_metadata[flow] is False:
                continue
            can_be_filled = True
            if flow == 'fill_add':
                required_params['alba_backend_guid'] = (str, None)
            for key, mtype in node.node_metadata['{0}_metadata'.format(
                    flow)].iteritems():
                if mtype in metadata_type_validation:
                    required_params[key] = metadata_type_validation[mtype]
        if can_be_filled is False:
            raise ValueError('The given node does not support filling slots')

        validation_reasons = []
        for osd_info in osd_information:  # type: dict
            try:
                ExtensionsToolbox.verify_required_params(
                    required_params=required_params, actual_params=osd_info)
            except RuntimeError as ex:
                validation_reasons.append(str(ex))
        if len(validation_reasons) > 0:
            raise ValueError('Missing required parameter:\n *{0}'.format(
                '\n* '.join(validation_reasons)))

        for osd_info in osd_information:
            if node.node_metadata['fill'] is True:
                # Only filling is required
                AlbaNodeController._fill_slot(
                    node, osd_info['slot_id'],
                    dict((key, osd_info[key])
                         for key in node.node_metadata['fill_metadata']))
            elif node.node_metadata['fill_add'] is True:
                # Fill the slot
                created_osds = AlbaNodeController._fill_slot(
                    node, osd_info['slot_id'],
                    dict((key, osd_info[key])
                         for key in node.node_metadata['fill_add_metadata']))
                # And add/claim the OSD
                if node.type == AlbaNode.NODE_TYPES.S3:
                    # The S3 manager returns the information about the osd when filling it
                    for created_osd_info in created_osds:
                        osd_info.update(
                            created_osd_info
                        )  # Add additional information about the osd
                        AlbaController.add_osds(
                            alba_backend_guid=osd_info['alba_backend_guid'],
                            osds=[osd_info],
                            alba_node_guid=node_guid,
                            metadata=metadata)
                else:
                    AlbaController.add_osds(
                        alba_backend_guid=osd_info['alba_backend_guid'],
                        osds=[osd_info],
                        alba_node_guid=node_guid,
                        metadata=metadata)
        node.invalidate_dynamics('stack')
Пример #22
0
    def fill_slots(node_cluster_guid,
                   node_guid,
                   osd_information,
                   metadata=None):
        # type: (str, str, List[Dict[str, Any]]) -> None
        """
        Creates 1 or more new OSDs
        :param node_cluster_guid: Guid of the node cluster to which the disks belong
        :type node_cluster_guid: basestring
        :param node_guid: Guid of the AlbaNode to act as the 'active' side
        :type node_guid: basestring
        :param osd_information: Information about the amount of OSDs to add to each Slot
        :type osd_information: list
        :param metadata: Metadata to add to the OSD (connection information for remote Backend, general Backend information)
        :type metadata: dict
        :return: None
        :rtype: NoneType
        """
        metadata_type_validation = {
            'integer': (int, None),
            'osd_type': (str, AlbaOSD.OSD_TYPES.keys()),
            'ip': (str, ExtensionsToolbox.regex_ip),
            'port': (int, {
                'min': 1,
                'max': 65535
            })
        }
        node_cluster = AlbaNodeCluster(node_cluster_guid)
        # Check for the active side if it's part of the cluster
        active_node = AlbaNode(node_guid)
        if active_node not in node_cluster.alba_nodes:
            raise ValueError(
                'The requested active AlbaNode is not part of AlbaNodeCluster {0}'
                .format(node_cluster.guid))
        required_params = {'slot_id': (str, None)}
        can_be_filled = False
        for flow in ['fill', 'fill_add']:
            if node_cluster.cluster_metadata[flow] is False:
                continue
            can_be_filled = True
            if flow == 'fill_add':
                required_params['alba_backend_guid'] = (str, None)
            for key, mtype in node_cluster.cluster_metadata[
                    '{0}_metadata'.format(flow)].iteritems():
                if mtype in metadata_type_validation:
                    required_params[key] = metadata_type_validation[mtype]
        if can_be_filled is False:
            raise ValueError(
                'The given node cluster does not support filling slots')

        validation_reasons = []
        for slot_info in osd_information:
            try:
                ExtensionsToolbox.verify_required_params(
                    required_params=required_params, actual_params=slot_info)
            except RuntimeError as ex:
                validation_reasons.append(str(ex))
        if len(validation_reasons) > 0:
            raise ValueError('Missing required parameter:\n *{0}'.format(
                '\n* '.join(validation_reasons)))

        for slot_info in osd_information:
            if node_cluster.cluster_metadata['fill'] is True:
                # Only filling is required
                active_node.client.fill_slot(
                    slot_id=slot_info['slot_id'],
                    extra=dict((key, slot_info[key]) for key in
                               node_cluster.cluster_metadata['fill_metadata']))
            elif node_cluster.cluster_metadata['fill_add'] is True:
                # Fill the slot
                active_node.client.fill_slot(
                    slot_id=slot_info['slot_id'],
                    extra=dict(
                        (key, slot_info[key]) for key in
                        node_cluster.cluster_metadata['fill_add_metadata']))

                # And add/claim the OSD
                AlbaController.add_osds(
                    alba_backend_guid=slot_info['alba_backend_guid'],
                    osds=[slot_info],
                    alba_node_guid=node_guid,
                    metadata=metadata)
        # Invalidate the stack and sync towards all passive sides
        active_node.invalidate_dynamics('stack')
        for node in node_cluster.alba_nodes:
            if node != active_node:
                try:
                    node.client.sync_stack(active_node.stack)
                except:
                    AlbaNodeClusterController._logger.exception(
                        'Error while syncing stacks to the passive side')
        node_cluster.invalidate_dynamics('stack')
Пример #23
0
    def remove_asd(node_guid, asd_id, expected_safety):
        """
        Removes an ASD
        :param node_guid: Guid of the node to remove an ASD from
        :type node_guid: str
        :param asd_id: ID of the ASD to remove
        :type asd_id: str
        :param expected_safety: Expected safety after having removed the ASD
        :type expected_safety: dict or None
        :return: Aliases of the disk on which the ASD was removed
        :rtype: list
        """
        node = AlbaNode(node_guid)
        AlbaNodeController._logger.debug('Removing ASD {0} at node {1}'.format(asd_id, node.ip))
        model_osd = None
        for disk in node.disks:
            for asd in disk.osds:
                if asd.osd_id == asd_id:
                    model_osd = asd
                    break
            if model_osd is not None:
                break
        if model_osd is not None:
            alba_backend = model_osd.alba_backend
        else:
            alba_backend = None

        asds = {}
        try:
            asds = node.client.get_asds()
        except (requests.ConnectionError, requests.Timeout, InvalidCredentialsError):
            AlbaNodeController._logger.warning('Could not connect to node {0} to validate ASD'.format(node.guid))
        partition_alias = None
        for alias, asd_ids in asds.iteritems():
            if asd_id in asd_ids:
                partition_alias = alias
                break

        if alba_backend is not None:
            if expected_safety is None:
                AlbaNodeController._logger.warning('Skipping safety check for ASD {0} on backend {1} - this is dangerous'.format(asd_id, alba_backend.guid))
            else:
                final_safety = AlbaController.calculate_safety(alba_backend_guid=alba_backend.guid,
                                                               removal_osd_ids=[asd_id])
                safety_lost = final_safety['lost']
                safety_crit = final_safety['critical']
                if (safety_crit != 0 or safety_lost != 0) and (safety_crit != expected_safety['critical'] or safety_lost != expected_safety['lost']):
                    raise RuntimeError('Cannot remove ASD {0} as the current safety is not as expected ({1} vs {2})'.format(asd_id, final_safety, expected_safety))
                AlbaNodeController._logger.debug('Safety OK for ASD {0} on backend {1}'.format(asd_id, alba_backend.guid))
            AlbaNodeController._logger.debug('Purging ASD {0} on backend {1}'.format(asd_id, alba_backend.guid))
            AlbaController.remove_units(alba_backend_guid=alba_backend.guid,
                                        osd_ids=[asd_id])
        else:
            AlbaNodeController._logger.warning('Could not match ASD {0} to any backend. Cannot purge'.format(asd_id))

        disk_data = None
        if partition_alias is not None:
            AlbaNodeController._logger.debug('Removing ASD {0} from disk {1}'.format(asd_id, partition_alias))
            for device_info in node.client.get_disks().itervalues():
                if partition_alias in device_info['partition_aliases']:
                    disk_data = device_info
                    result = node.client.delete_asd(disk_id=device_info['aliases'][0].split('/')[-1],
                                                    asd_id=asd_id)
                    if result['_success'] is False:
                        raise RuntimeError('Error removing ASD: {0}'.format(result['_error']))
            if disk_data == {}:
                raise RuntimeError('Failed to find disk for partition with alias {0}'.format(partition_alias))
        else:
            AlbaNodeController._logger.warning('Could not remove ASD from remote node (node down)'.format(asd_id))

        if Configuration.exists(AlbaNodeController.ASD_CONFIG.format(asd_id), raw=True):
            Configuration.delete(AlbaNodeController.ASD_CONFIG_DIR.format(asd_id), raw=True)

        if model_osd is not None:
            model_osd.delete()
        if alba_backend is not None:
            alba_backend.invalidate_dynamics()
            alba_backend.backend.invalidate_dynamics()
        if node.storagerouter is not None:
            DiskController.sync_with_reality(storagerouter_guid=node.storagerouter_guid)

        return [] if disk_data is None else disk_data.get('aliases', [])
Пример #24
0
    def test_nsm_checkup_external(self):
        """
        Validates whether the NSM checkup works for externally managed Arakoon clusters
        """
        Configuration.set('/ovs/framework/plugins/alba/config|nsm.safety', 1)
        Configuration.set('/ovs/framework/plugins/alba/config|nsm.maxload', 10)

        structure = DalHelper.build_dal_structure(structure={'storagerouters': [1, 2, 3]})
        alba_structure = AlbaDalHelper.build_dal_structure(structure={'alba_backends': [[1, 'LOCAL']]})

        alba_backend = alba_structure['alba_backends'][1]
        storagerouter_1 = structure['storagerouters'][1]
        storagerouter_2 = structure['storagerouters'][2]

        # Validate some logic for externally managed arakoons during NSM checkup
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(external_nsm_cluster_names=['test'])  # No ALBA Backend specified
        self.assertEqual(first=str(raise_info.exception), second='Additional NSMs can only be configured for a specific ALBA Backend')
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, min_internal_nsms=2, external_nsm_cluster_names=['test'])
        self.assertEqual(first=str(raise_info.exception), second="'min_internal_nsms' and 'external_nsm_cluster_names' are mutually exclusive")
        with self.assertRaises(ValueError) as raise_info:
            # noinspection PyTypeChecker
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names={})  # NSM cluster names must be a list
        self.assertEqual(first=str(raise_info.exception), second="'external_nsm_cluster_names' must be of type 'list'")
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=['non-existing-cluster'])  # non-existing cluster names should raise
        self.assertEqual(first=str(raise_info.exception), second="Arakoon cluster with name non-existing-cluster does not exist")

        # Create an external ABM and NSM Arakoon cluster
        external_abm_1 = 'backend_1-abm'
        external_nsm_1 = 'backend_1-nsm_0'
        external_nsm_2 = 'backend_1-nsm_1'
        for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems():
            arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
            arakoon_installer.create_cluster(cluster_type=cluster_type, ip=storagerouter_1.ip, base_dir='/tmp', internal=False)
            arakoon_installer.extend_cluster(new_ip=storagerouter_2.ip, base_dir='/tmp')
            arakoon_installer.start_cluster()
            arakoon_installer.unclaim_cluster()
            self.assertDictEqual(d1={'cluster_name': cluster_name,
                                     'cluster_type': cluster_type,
                                     'internal': False,
                                     'in_use': False},
                                 d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name))

        # Let the 'add_cluster` claim the externally managed clusters and model the services
        Logger._logs = {}
        AlbaController.add_cluster(alba_backend_guid=alba_backend.guid,
                                   abm_cluster=external_abm_1,
                                   nsm_clusters=[external_nsm_1])  # Only claim external_nsm_1
        for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems():
            arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
            self.assertDictEqual(d1={'cluster_name': cluster_name,
                                     'cluster_type': cluster_type,
                                     'internal': False,
                                     'in_use': False if cluster_name == external_nsm_2 else True},
                                 d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name))
        log_found = False
        for log_record in Logger._logs.get('lib', []):
            if 'NSM load OK' in log_record:
                log_found = True
                break
        self.assertTrue(expr=log_found)
        self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services))
        self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter)
        self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter)
        self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['update_abm_client_config'],
                                                                           ['add_nsm_host', 'backend_1-nsm_0'],
                                                                           ['update_maintenance_config','--eviction-type-random'],
                                                                           ['update_maintenance_config','enable-auto-cleanup-deleted-namespaces-days']])

        # Add cluster already invokes a NSM checkup, so nothing should have changed
        VirtualAlbaBackend.run_log['backend_1-abm'] = []
        AlbaArakoonController.nsm_checkup()
        self.assertListEqual(list1=[], list2=VirtualAlbaBackend.run_log['backend_1-abm'])

        # Overload the only NSM and run NSM checkup. This should log a critical message, but change nothing
        VirtualAlbaBackend.data['backend_1-abm']['nsms'][0]['namespaces_count'] = 25
        Logger._logs = {}
        AlbaArakoonController.nsm_checkup()
        log_found = False
        for log_record in Logger._logs.get('lib', []):
            if 'All NSM clusters are overloaded' in log_record:
                log_found = True
                break
        self.assertTrue(expr=log_found)
        self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services))
        self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter)
        self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter)
        self.assertListEqual(list1=[], list2=VirtualAlbaBackend.run_log['backend_1-abm'])

        # Validate a maximum of 50 NSMs can be deployed
        current_nsms = [nsm_cluster.number for nsm_cluster in alba_backend.nsm_clusters]
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_nsm_clusters': [(1, 50)]},  # (<abackend_id>, <amount_of_nsm_clusters>)
            previous_structure=alba_structure
        )
        # Try to add 1 additional NSM
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_2])
        self.assertEqual(first=str(raise_info.exception), second='The maximum of 50 NSM Arakoon clusters will be exceeded. Amount of clusters that can be deployed for this ALBA Backend: 0')

        # Remove the unused NSM clusters again
        for nsm_cluster in alba_structure['alba_nsm_clusters'][1][len(current_nsms):]:
            for nsm_service in nsm_cluster.nsm_services:
                nsm_service.delete()
                nsm_service.service.delete()
            nsm_cluster.delete()

        # Try to add a previously claimed NSM cluster
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_1])  # The provided cluster_name to claim has already been claimed
        self.assertEqual(first=str(raise_info.exception), second='Some of the provided cluster_names have already been claimed before')

        # Add a 2nd NSM cluster
        AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_2])
        self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services))
        self.assertEqual(first=2, second=len(alba_backend.nsm_clusters))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[1].nsm_services))
        self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter)
        self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter)
        self.assertIsNone(obj=alba_backend.nsm_clusters[1].nsm_services[0].service.storagerouter)
        self.assertListEqual(list1=[['add_nsm_host', 'backend_1-nsm_1']], list2=VirtualAlbaBackend.run_log['backend_1-abm'])
        for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems():
            arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
            self.assertDictEqual(d1={'cluster_name': cluster_name,
                                     'cluster_type': cluster_type,
                                     'internal': False,
                                     'in_use': True},
                                 d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name))