def setup(**kwargs): """ Execute several actions before starting a new UnitTest :param kwargs: Additional key word arguments :type kwargs: dict """ DalHelper.setup(**kwargs) # noinspection PyProtectedMember ManagerClientMockup.clean_data() # noinspection PyProtectedMember VirtualAlbaBackend.clean_data() # noinspection PyProtectedMember AlbaController._add_base_configuration()
def claim_asds(alba_backend, nr_of_asds, disk_type=''): """ Claim disks :param alba_backend: ALBA Backend :param nr_of_asds: Amount of disks to claim :param disk_type: Type of disks :return: None """ def _wait_for_asd_count_with_status(_alba_backend, _nr_of_asds, status): counter = GeneralAlba.ALBA_TIMER / GeneralAlba.ALBA_TIMER_STEP asds_with_status = {} while counter > 0: GeneralAlba.logger.info('counter: {0}'.format(counter)) _alba_backend.invalidate_dynamics(['local_stack']) for node_id in _alba_backend.local_stack: for _disk in _alba_backend.local_stack[node_id].values(): for _osd_id, _asd in _disk['asds'].iteritems(): if _asd['status'] == status: asds_with_status[_osd_id] = _disk.get('guid') GeneralAlba.logger.info('looking for {0} asds with status {1}: {2}'.format(_nr_of_asds, status, asds_with_status)) if len(asds_with_status) >= _nr_of_asds: break counter -= 1 time.sleep(GeneralAlba.ALBA_TIMER_STEP) assert len(asds_with_status) >= _nr_of_asds,\ "Unable to find {0} asds, only found {1} asds with status: {2}.\n".format(_nr_of_asds, len(asds_with_status), status) return asds_with_status claimed_asds = [] alba_backend.invalidate_dynamics(['local_stack']) local_stack = alba_backend.local_stack for disks in local_stack.values(): for disk in disks.values(): for osd_id, asd in disk['asds'].iteritems(): if asd['status'] == 'claimed': claimed_asds.append(osd_id) nr_asds_to_claim = nr_of_asds - len(claimed_asds) if nr_asds_to_claim <= 0: return True # @TODO: Initialize disks should be parameterized to be parallel or sequential with parallel being the default GeneralAlba.initialise_disks(alba_backend, nr_asds_to_claim, disk_type) claimable_asds = _wait_for_asd_count_with_status(alba_backend, nr_asds_to_claim, 'available') GeneralAlba.logger.info('osds: {0}'.format(claimable_asds)) AlbaController.add_units(alba_backend.guid, claimable_asds) _wait_for_asd_count_with_status(alba_backend, nr_of_asds, 'claimed')
def move_slot(node_guid, slot_id, destination_node_guid): """ Move a slot from one node to another. If the same disk can be accessed, all ASD ownership is moved This is a Dual Controller feature :param node_guid: Guid of the owner node :type node_guid: str :param slot_id: Identifier of the slot :type slot_id: str :param destination_node_guid: Guid of the destination node :type destination_node_guid: str :return: None :rtype: NoneType """ origin_node = AlbaNode(node_guid) destination_node = AlbaNode(destination_node_guid) # Validation if origin_node.alba_node_cluster is None: raise ValueError( 'Node with guid {0} is not part of a cluster'.format( node_guid)) if origin_node.alba_node_cluster != destination_node.alba_node_cluster: raise ValueError('The nodes are not part of the same cluster') if slot_id not in origin_node.stack: raise ValueError( 'Slot with ID {0} is not available in the origin node with guid {1}' .format(slot_id, node_guid)) if slot_id not in destination_node.stack: raise ValueError( 'Slot with ID {0} is not available in the destination node with guid {1}' .format(slot_id, destination_node_guid)) # Stop the OSDs on the origin try: origin_node.client.stop_slot(slot_id) except: AlbaNodeClusterController._logger.exception( 'Unable to stop the slot ') raise try: # Update all references in Alba AlbaController.update_osds() raise NotImplementedError() except: raise
def get_maintenance_config(self, albabackend): # type : (AlbaBackend, int) -> dict """ Set the maintenance config for the Backend :param albabackend: ALBA Backend to set the maintenance config for :type albabackend: ovs.dal.hybrids.albabackend.AlbaBackend :return: Dict that represents the config :rtype: dict """ return AlbaController.get_maintenance_config( alba_backend_guid=albabackend.guid)
def checkup_nsm_hosts(albabackend_name, amount): """ Checkup the NSM hosts for a certain alba backend :param albabackend_name: name of a existing alba backend :type albabackend_name: str :param amount: amount of min. NSM hosts for a certain backend :type amount: int :return: """ alba_backend_guid = BackendHelper.get_alba_backend_guid_by_name( albabackend_name) return AlbaController.nsm_checkup(backend_guid=alba_backend_guid, min_nsms=int(amount))
def get_maintenance_metadata(self): # type: () -> dict """ Return a maintenance layout that the GUI can interpret to create a dynamic form :return: Dict with metadata :rtype: dict """ metadata = {} if AlbaController.can_set_auto_cleanup(): metadata.update({ 'edit': True, 'edit_metadata': { 'auto_cleanup_deleted_namespaces': 'integer' } }) return metadata
def unclaim_disks(alba_backend): """ Un-claim disks :param alba_backend: ALBA backend :return: None """ # @TODO: Allow the unclaim of disks go sequentially or parallel (parallel should be default) alba_backend.invalidate_dynamics(['local_stack']) for disks in alba_backend.local_stack.values(): for disk_id, disk in disks.iteritems(): if disk['status'] in ['uninitialized']: continue asd_node = GeneralAlba.get_node_by_id(disk['node_id']) for osd_id in disk['asds'].keys(): current_safety = AlbaController.calculate_safety(alba_backend.guid, [osd_id]) data = {'asd_id': osd_id, 'safety': current_safety} GeneralAlba.logger.info(GeneralAlba.api.execute_post_action('alba/nodes', asd_node.guid, 'reset_asd', data, wait=True)) data = {'disk': '/dev/disk/by-id/' + disk_id} GeneralAlba.logger.info(GeneralAlba.api.execute_post_action('alba/nodes', asd_node.guid, 'remove_disk', data, wait=True))
def set_maintenance_config(self, albabackend, maintenance_config): # type : (AlbaBackend, int) -> None """ Set the maintenance config for the Backend :param albabackend: ALBA Backend to set the maintenance config for :type albabackend: ovs.dal.hybrids.albabackend.AlbaBackend :param maintenance_config: Maintenance config as it should be set Possible keys: - auto_cleanup_deleted_namespaces: Number of days to wait before cleaning up. Setting to 0 means disabling the auto cleanup and always clean up a namespace after removing it (int) :type maintenance_config: dict :return: Asynchronous result of a CeleryTask :rtype: celery.result.AsyncResult """ # API implementation can be changed in the future. The whole config is sent through the API but only one setting is used days = maintenance_config.get('auto_cleanup_deleted_namespaces') if not isinstance(days, int) or 0 > days: raise HttpNotAcceptableException( error='invalid_data', error_description= "'auto_cleanup_deleted_namespaces' should be a positive integer or 0" ) return AlbaController.set_auto_cleanup( alba_backend_guid=albabackend.guid, days=days)
def update_preset(alba_backend_guid, name, policies): """ Updates policies for an existing preset to Alba :param alba_backend_guid: Guid of the ALBA backend :type alba_backend_guid: str :param name: Name of backend :type name: str :param policies: New policy list to be sent to alba :type policies: list :return: None """ # VALIDATIONS AlbaPresetController._validate_policies_param(policies=policies) alba_backend = AlbaBackend(alba_backend_guid) if name not in [preset['name'] for preset in alba_backend.presets]: raise RuntimeError('Could not find a preset with name {0} for ALBA Backend {1}'.format(name, alba_backend.name)) # UPDATE PRESET AlbaPresetController._logger.debug('Updating preset {0} with policies {1}'.format(name, policies)) config = Configuration.get_configuration_path(ArakoonInstaller.CONFIG_KEY.format(AlbaController.get_abm_cluster_name(alba_backend=alba_backend))) temp_config_file = tempfile.mktemp() with open(temp_config_file, 'wb') as data_file: data_file.write(json.dumps({'policies': policies})) data_file.flush() AlbaCLI.run(command='update-preset', config=config, named_params={'input-url': temp_config_file}, extra_params=[name]) alba_backend.invalidate_dynamics() os.remove(temp_config_file)
def delete_preset(alba_backend_guid, name): """ Deletes a preset from the Alba backend :param alba_backend_guid: Guid of the ALBA backend :type alba_backend_guid: str :param name: Name of the preset :type name: str :return: None """ # VALIDATIONS alba_backend = AlbaBackend(alba_backend_guid) preset_default_map = dict((preset['name'], preset['is_default']) for preset in alba_backend.presets) if name not in preset_default_map: AlbaPresetController._logger.warning('Preset with name {0} for ALBA Backend {1} could not be found, so not deleting'.format(name, alba_backend.name)) return if preset_default_map[name] is True: raise RuntimeError('Cannot delete the default preset') # DELETE PRESET AlbaPresetController._logger.debug('Deleting preset {0}'.format(name)) config = Configuration.get_configuration_path(ArakoonInstaller.CONFIG_KEY.format(AlbaController.get_abm_cluster_name(alba_backend=alba_backend))) AlbaCLI.run(command='delete-preset', config=config, extra_params=[name]) alba_backend.invalidate_dynamics()
def get_update_information_alba_plugin(information): """ Called when the 'Update' button in the GUI is pressed This call collects additional information about the packages which can be updated Eg: * Downtime for Arakoons * Downtime for StorageDrivers * Prerequisites that haven't been met * Services which will be stopped during update * Services which will be restarted after update """ # Verify arakoon info arakoon_ovs_info = {'down': False, 'name': None, 'internal': False} arakoon_cacc_info = {'down': False, 'name': None, 'internal': False} for cluster in ['cacc', 'ovsdb']: cluster_name = ArakoonClusterConfig.get_cluster_name(cluster) if cluster_name is None: continue if cluster == 'cacc': arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name, filesystem=True, ip=System.get_my_storagerouter().ip) else: arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name) if arakoon_metadata['internal'] is True: config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=(cluster == 'cacc')) config.load_config(System.get_my_storagerouter().ip if cluster == 'cacc' else None) if cluster == 'ovsdb': arakoon_ovs_info['down'] = len(config.nodes) < 3 arakoon_ovs_info['name'] = arakoon_metadata['cluster_name'] arakoon_ovs_info['internal'] = True else: arakoon_cacc_info['name'] = arakoon_metadata['cluster_name'] arakoon_cacc_info['internal'] = True # Verify StorageRouter downtime fwk_prerequisites = [] all_storagerouters = StorageRouterList.get_storagerouters() for storagerouter in all_storagerouters: try: SSHClient(endpoint=storagerouter, username='******') except UnableToConnectException: fwk_prerequisites.append(['node_down', storagerouter.name]) # Verify ALBA node responsiveness alba_prerequisites = [] for alba_node in AlbaNodeList.get_albanodes(): try: alba_node.client.get_metadata() except Exception: alba_prerequisites.append(['alba_node_unresponsive', alba_node.ip]) for key in ['framework', 'alba']: if key not in information: information[key] = {'packages': {}, 'downtime': [], 'prerequisites': fwk_prerequisites if key == 'framework' else alba_prerequisites, 'services_stop_start': set(), 'services_post_update': set()} for storagerouter in StorageRouterList.get_storagerouters(): if key not in storagerouter.package_information: continue # Retrieve Arakoon issues arakoon_downtime = [] arakoon_services = [] for service in storagerouter.services: if service.type.name not in [ServiceType.SERVICE_TYPES.ALBA_MGR, ServiceType.SERVICE_TYPES.NS_MGR]: continue if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR: cluster_name = AlbaController.get_abm_cluster_name(alba_backend=service.abm_service.alba_backend) else: cluster_name = AlbaController.get_nsm_cluster_name(alba_backend=service.nsm_service.alba_backend, number=service.nsm_service.number) if Configuration.exists('/ovs/arakoon/{0}/config'.format(cluster_name), raw=True) is False: continue arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name) if arakoon_metadata['internal'] is True: arakoon_services.append('ovs-{0}'.format(service.name)) config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=False) config.load_config() if len(config.nodes) < 3: if service.type.name == ServiceType.SERVICE_TYPES.NS_MGR: arakoon_downtime.append(['backend', service.nsm_service.alba_backend.name]) else: arakoon_downtime.append(['backend', service.abm_service.alba_backend.name]) for package_name, package_info in storagerouter.package_information[key].iteritems(): if package_name not in AlbaUpdateController.alba_plugin_packages: continue # Only gather information for the core packages information[key]['services_post_update'].update(package_info.pop('services_to_restart')) if package_name not in information[key]['packages']: information[key]['packages'][package_name] = {} information[key]['packages'][package_name].update(package_info) if package_name == 'openvstorage-backend': if ['gui', None] not in information[key]['downtime']: information[key]['downtime'].append(['gui', None]) if ['api', None] not in information[key]['downtime']: information[key]['downtime'].append(['api', None]) information[key]['services_stop_start'].update({'watcher-framework', 'memcached'}) elif package_name == 'alba': for down in arakoon_downtime: if down not in information[key]['downtime']: information[key]['downtime'].append(down) information[key]['services_post_update'].update(arakoon_services) elif package_name == 'arakoon': if key == 'framework': framework_arakoons = set() if arakoon_ovs_info['internal'] is True: framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_ovs_info['name'])) if arakoon_cacc_info['internal'] is True: framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_cacc_info['name'])) information[key]['services_post_update'].update(framework_arakoons) if arakoon_ovs_info['down'] is True and ['ovsdb', None] not in information[key]['downtime']: information[key]['downtime'].append(['ovsdb', None]) else: for down in arakoon_downtime: if down not in information[key]['downtime']: information[key]['downtime'].append(down) information[key]['services_post_update'].update(arakoon_services) for alba_node in AlbaNodeList.get_albanodes(): for package_name, package_info in alba_node.package_information.get(key, {}).iteritems(): if package_name not in AlbaUpdateController.sdm_packages: continue # Only gather information for the SDM packages information[key]['services_post_update'].update(package_info.pop('services_to_restart')) if package_name not in information[key]['packages']: information[key]['packages'][package_name] = {} information[key]['packages'][package_name].update(package_info) return information
def test_maintenance_agents_for_local_backends_w_layout(self): """ Validates the checkup maintenance agents for LOCAL ALBA Backends with a specific layout specified Additionally test whether at least 1 maintenance agent gets deployed even though none of the ALBA Nodes is linked to the ALBA Backend """ alba_structure = AlbaDalHelper.build_dal_structure( structure={ 'alba_nodes': [1, 2, 3, 4], 'alba_backends': [[1, 'LOCAL']], 'alba_abm_clusters': [1] }) alba_backend = alba_structure['alba_backends'][1] config_key = AlbaController.AGENTS_LAYOUT_CONFIG_KEY.format( alba_backend.guid) unknown_node_name = 'non-existing-node' # Mock some return values for some of the calls performed by `checkup_maintenance_agents` for alba_node in alba_structure['alba_nodes'].itervalues(): ManagerClientMockup.test_results[alba_node].update({ 'get_stack': {}, 'get_service_status': { 'status': [None, 'active'] }, 'add_maintenance_service': '', 'remove_maintenance_service': '' }) ############################### # Verify incorrect layout value log_entry = 'Layout is not a list and will be ignored' Configuration.set(key=config_key, value=unknown_node_name) # Value should be a list # Checkup maintenance agents will not find any suitable ALBA Nodes to deploy a maintenance agent on, because no ALBA Nodes are linked to the ALBA Backend yet, # therefore it'll deploy a maintenance on a random ALBA Node AlbaController.checkup_maintenance_agents() self.assertIn(member=log_entry, container=Logger._logs['lib'].keys()) self.assertEqual(first='WARNING', second=Logger._logs['lib'][log_entry]) # Example of ManagerClientMockup.maintenance_agents # {<AlbaNode (guid: c015cf06-8bd0-46c5-811d-41ac6f521a63, at: 0x7f77cb0af390)>: {'alba-maintenance_backend_1-J6PMBcEk1Ej42udp': ['node_1']}} self.assertEqual( first=1, second=len(ManagerClientMockup.maintenance_agents.keys( ))) # Only 1 ALBA Node should have a maintenance agent running self.assertEqual( first=1, second=len(ManagerClientMockup.maintenance_agents.values()) ) # Only 1 maintenance agent should have been deployed on that 1 ALBA Node alba_node_w_agent_1 = ManagerClientMockup.maintenance_agents.keys()[0] self.assertEqual( first=[alba_node_w_agent_1.node_id], second=ManagerClientMockup.maintenance_agents[alba_node_w_agent_1]. values()[0] ) # Read preference must be the Node ID of the Node on which the maintenance was deployed # 3 out of 4 ALBA Nodes do not have a maintenance agent yet alba_nodes_wo_agent = [ an for an in alba_structure['alba_nodes'].itervalues() if an != alba_node_w_agent_1 ] self.assertEqual(first=3, second=len(alba_nodes_wo_agent)) # Link an ALBA Node without agent to the ALBA Backend, forcing the previously deployed service to be removed and a new 1 to be created on this ALBA Node alba_node = alba_nodes_wo_agent[0] VirtualAlbaBackend.data['127.0.0.1:35001'] = alba_backend.guid ManagerClientMockup.test_results[alba_node]['get_stack'] = { alba_node.node_id: { 'osds': { 'osd_id_1': { 'ips': ['127.0.0.1'], 'port': 35001 } } } } alba_node.invalidate_dynamics('stack') AlbaController.checkup_maintenance_agents() self.assertEqual( first=1, second=len(ManagerClientMockup.maintenance_agents.keys( ))) # Only 1 ALBA Node should have a maintenance agent running self.assertEqual( first=1, second=len(ManagerClientMockup.maintenance_agents.values()) ) # Only 1 maintenance agent should have been deployed on that 1 ALBA Node self.assertNotEqual( first=alba_node_w_agent_1, second=alba_node ) # The maintenance agent should have moved to the node linked to the ALBA Backend self.assertEqual( first=[alba_node.node_id], second=ManagerClientMockup.maintenance_agents[alba_node].values() [0] ) # Read preference must be the Node ID of the Node on which the maintenance was moved to # Re-scheduling a checkup should not change anything at this point AlbaController.checkup_maintenance_agents() self.assertEqual( first=1, second=len(ManagerClientMockup.maintenance_agents.keys( ))) # Only 1 ALBA Node should have a maintenance agent running self.assertEqual( first=1, second=len(ManagerClientMockup.maintenance_agents.values()) ) # Only 1 maintenance agent should have been deployed on that 1 ALBA Node alba_node_w_agent_2 = ManagerClientMockup.maintenance_agents.keys()[0] self.assertEqual(first=alba_node, second=alba_node_w_agent_2 ) # The maintenance agent should not have moved # Set 2 out of 4 ALBA Nodes in the layout key alba_nodes_wo_agent = [ an for an in alba_structure['alba_nodes'].itervalues() if an != alba_node_w_agent_2 ] self.assertEqual(first=3, second=len(alba_nodes_wo_agent)) node_1 = alba_nodes_wo_agent[0] node_2 = alba_nodes_wo_agent[1] Configuration.set(key=config_key, value=[node_1.node_id, node_2.node_id]) AlbaController.checkup_maintenance_agents() self.assertIn(member=node_1, container=ManagerClientMockup.maintenance_agents ) # Specified in the layout self.assertIn(member=node_2, container=ManagerClientMockup.maintenance_agents ) # Specified in the layout self.assertEqual( first=2, second=len(ManagerClientMockup.maintenance_agents) ) # Only the 2 specified ALBA Nodes should be running a maintenance agent self.assertEqual(first=1, second=len( ManagerClientMockup.maintenance_agents[node_1]) ) # 1 Maintenance agent for this ALBA Node self.assertEqual(first=1, second=len( ManagerClientMockup.maintenance_agents[node_2]) ) # 1 Maintenance agent for this ALBA Node self.assertEqual( first=[node_1.node_id], second=ManagerClientMockup.maintenance_agents[node_1].values() [0]) # Validate the read preference self.assertEqual( first=[node_2.node_id], second=ManagerClientMockup.maintenance_agents[node_2].values() [0]) # Validate the read preference ######################################### # Verify all ALBA Nodes unknown in layout Logger._logs['lib'] = {} log_entry = 'Layout does not contain any known/reachable nodes and will be ignored' Configuration.set(key=config_key, value=[unknown_node_name ]) # Only unknown Nodes in layout AlbaController.checkup_maintenance_agents() self.assertIn(member=log_entry, container=Logger._logs['lib'].keys()) self.assertEqual(first='WARNING', second=Logger._logs['lib'][log_entry]) self.assertIn( member=alba_node, container=ManagerClientMockup.maintenance_agents ) # The ALBA Node linked to the ALBA Backend should again have the maintenance agent self.assertEqual( first=1, second=len(ManagerClientMockup.maintenance_agents[alba_node] )) # Only 1 maintenance agent should have been deployed self.assertEqual( first=[alba_node.node_id], second=ManagerClientMockup.maintenance_agents[alba_node].values() [0] ) # Read preference must be the Node ID of the Node on which the maintenance was deployed # 3 out of 4 ALBA Nodes do not have a maintenance agent yet alba_nodes_wo_agent = [ an for an in alba_structure['alba_nodes'].itervalues() if an != alba_node_w_agent_1 ] self.assertEqual(first=3, second=len(alba_nodes_wo_agent)) ############################################# # Verify at least 1 known ALBA Node in layout Logger._logs['lib'] = {} node_3 = alba_structure['alba_nodes'][3] log_entry = 'Layout contains unknown/unreachable node {0}'.format( unknown_node_name) Configuration.set(key=config_key, value=[unknown_node_name, node_3.node_id ]) # 1 known ALBA Node in layout AlbaController.checkup_maintenance_agents() self.assertIn(member=log_entry, container=Logger._logs['lib'].keys()) self.assertEqual(first='WARNING', second=Logger._logs['lib'][log_entry]) self.assertIn( member=node_3, container=ManagerClientMockup.maintenance_agents ) # The ALBA Node specified in the layout should have the maintenance agent self.assertEqual( first=1, second=len(ManagerClientMockup.maintenance_agents[node_3] )) # Only 1 maintenance agent should have been deployed self.assertEqual( first=[node_3.node_id], second=ManagerClientMockup.maintenance_agents[node_3].values()[0] ) # Read preference must be the Node ID of the Node on which the maintenance was deployed # 3 out of 4 ALBA Nodes do not have a maintenance agent yet alba_nodes_wo_agent = [ an for an in alba_structure['alba_nodes'].itervalues() if an != node_3 ] self.assertEqual(first=3, second=len(alba_nodes_wo_agent))
def test_maintenance_agents_for_local_backends_wo_layout(self): """ Validates the checkup maintenance agents for LOCAL ALBA Backends without a specific layout specified Additionally test: * Checkup maintenance agents for a specific ALBA Backend * Downscale the required services * Upscale the required services """ alba_structure = AlbaDalHelper.build_dal_structure( structure={ 'alba_nodes': [1, 2, 3, 4], 'alba_backends': [[1, 'LOCAL']], 'alba_abm_clusters': [1] }) # Simulate every ALBA Node has 1 OSD for `alba_backend_1` local_ip = '127.0.0.1' alba_backend_1 = alba_structure['alba_backends'][1] for index, alba_node in enumerate( alba_structure['alba_nodes'].itervalues()): port = 35000 + index ManagerClientMockup.test_results[alba_node].update({ 'get_service_status': { 'status': [None, 'active'] }, 'add_maintenance_service': '', 'remove_maintenance_service': '', 'get_stack': { alba_node.node_id: { 'osds': { 'osd_id_{0}'.format(index): { 'ips': [local_ip], 'port': port } } } } }) VirtualAlbaBackend.data['{0}:{1}'.format( local_ip, port)] = alba_backend_1.guid # Since all ALBA Nodes (4) are suitable for a maintenance agent, we only deploy a default amount of 3 AlbaController.checkup_maintenance_agents() # Example of ManagerClientMockup.maintenance_agents # { # <AlbaNode (guid: d43df79f-9c47-4059-bd84-0f3ef81733c2, at: 0x7f80028e4750)>: {'alba-maintenance_backend_1-RWvL8aCzwIBk6FaZ': ['node_3']}, # <AlbaNode (guid: 5dbf972d-2619-48d1-adcd-86ec5b6342f7, at: 0x7f80027ee2d0)>: {'alba-maintenance_backend_1-OgvznajMoRagKCVb': ['node_2']}, # <AlbaNode (guid: 79a762f7-3019-4b86-80d6-a5560c52b208, at: 0x7f80027ee0d0)>: {'alba-maintenance_backend_1-ZV9v2vtRfvaYBhhw': ['node_4']} # } self.assertEqual( first=3, second=len(ManagerClientMockup.maintenance_agents) ) # 3 out of 4 ALBA Nodes should have a maintenance agent for alba_node, maintenance_info in ManagerClientMockup.maintenance_agents.iteritems( ): self.assertEqual(first=1, second=len(maintenance_info)) self.assertEqual(first=[alba_node.node_id], second=ManagerClientMockup. maintenance_agents[alba_node].values()[0]) # Downscale the required amount of services from 3 to 2 config_key = AlbaController.NR_OF_AGENTS_CONFIG_KEY.format( alba_backend_1.guid) Configuration.set(key=config_key, value=2) nodes_w_agents = ManagerClientMockup.maintenance_agents.keys() AlbaController.checkup_maintenance_agents() self.assertEqual( first=2, second=len(ManagerClientMockup.maintenance_agents) ) # 2 out of 4 ALBA Nodes should have a maintenance agent now for alba_node, maintenance_info in ManagerClientMockup.maintenance_agents.iteritems( ): self.assertEqual(first=1, second=len(maintenance_info)) self.assertEqual(first=[alba_node.node_id], second=ManagerClientMockup. maintenance_agents[alba_node].values()[0]) for alba_node in ManagerClientMockup.maintenance_agents: self.assertIn( member=alba_node, container=nodes_w_agents ) # 1 removed, rest should still be part of previously used ALBA Nodes # Upscale the required amount of services from 2 to 3 again Configuration.set(key=config_key, value=3) AlbaController.checkup_maintenance_agents() self.assertEqual( first=3, second=len(ManagerClientMockup.maintenance_agents) ) # 3 out of 4 ALBA Nodes should again have a maintenance agent for alba_node, maintenance_info in ManagerClientMockup.maintenance_agents.iteritems( ): self.assertEqual(first=1, second=len(maintenance_info)) self.assertEqual(first=[alba_node.node_id], second=ManagerClientMockup. maintenance_agents[alba_node].values()[0]) # Create an additional ALBA Backend and verify that it is not processed when asking to checkup the previously created ALBA Backend alba_structure = AlbaDalHelper.build_dal_structure( structure={ 'alba_backends': [[2, 'LOCAL']], 'alba_abm_clusters': [2] }, previous_structure=alba_structure) alba_backend_2 = alba_structure['alba_backends'][2] AlbaController.checkup_maintenance_agents( alba_backend_guid=alba_backend_1.guid ) # Run checkup for previously created ALBA Backend, nothing should change self.assertEqual( first=3, second=len(ManagerClientMockup.maintenance_agents) ) # 3 out of 4 ALBA Nodes should again have a maintenance agent for alba_node, maintenance_info in ManagerClientMockup.maintenance_agents.iteritems( ): self.assertEqual(first=1, second=len(maintenance_info)) self.assertEqual(first=[alba_node.node_id], second=ManagerClientMockup. maintenance_agents[alba_node].values()[0]) # Execute a general checkup maintenance agents and verify newly created ALBA Backend has 1 service (because not linked to any OSDs) AlbaController.checkup_maintenance_agents() services = [] for alba_node, maintenance_info in ManagerClientMockup.maintenance_agents.iteritems( ): alba_node.invalidate_dynamics('maintenance_services') if alba_backend_2.name in alba_node.maintenance_services: services.append(maintenance_info) self.assertEqual(first=1, second=len(maintenance_info)) self.assertEqual(first=[alba_node.node_id], second=maintenance_info.values()[0]) self.assertEqual( first=1, second=len(services) ) # Only 1 service should have been deployed for the 2nd ALBA Backend
def remove_osd(node_guid, osd_id, expected_safety): """ Removes an OSD :param node_guid: Guid of the node to remove an OSD from :type node_guid: str :param osd_id: ID of the OSD to remove :type osd_id: str :param expected_safety: Expected safety after having removed the OSD :type expected_safety: dict or None :return: Aliases of the disk on which the OSD was removed :rtype: list """ # Retrieve corresponding OSD in model node = AlbaNode(node_guid) AlbaNodeController._logger.debug('Removing OSD {0} at node {1}'.format( osd_id, node.ip)) osd = AlbaOSDList.get_by_osd_id(osd_id) alba_backend = osd.alba_backend if expected_safety is None: AlbaNodeController._logger.warning( 'Skipping safety check for OSD {0} on backend {1} - this is dangerous' .format(osd_id, alba_backend.guid)) else: final_safety = AlbaController.calculate_safety( alba_backend_guid=alba_backend.guid, removal_osd_ids=[osd_id]) safety_lost = final_safety['lost'] safety_crit = final_safety['critical'] if (safety_crit != 0 or safety_lost != 0) and ( safety_crit != expected_safety['critical'] or safety_lost != expected_safety['lost']): raise RuntimeError( 'Cannot remove OSD {0} as the current safety is not as expected ({1} vs {2})' .format(osd_id, final_safety, expected_safety)) AlbaNodeController._logger.debug( 'Safety OK for OSD {0} on backend {1}'.format( osd_id, alba_backend.guid)) AlbaNodeController._logger.debug( 'Purging OSD {0} on backend {1}'.format(osd_id, alba_backend.guid)) AlbaController.remove_units(alba_backend_guid=alba_backend.guid, osd_ids=[osd_id]) # Delete the OSD result = node.client.delete_osd(slot_id=osd.slot_id, osd_id=osd_id) if result['_success'] is False: raise RuntimeError('Error removing OSD: {0}'.format( result['_error'])) # Clean configuration management and model - Well, just try it at least if Configuration.exists(ASD_CONFIG.format(osd_id), raw=True): Configuration.delete(ASD_CONFIG_DIR.format(osd_id), raw=True) osd.delete() node.invalidate_dynamics() if alba_backend is not None: alba_backend.invalidate_dynamics() alba_backend.backend.invalidate_dynamics() if node.storagerouter is not None: try: DiskController.sync_with_reality( storagerouter_guid=node.storagerouter_guid) except UnableToConnectException: AlbaNodeController._logger.warning( 'Skipping disk sync since StorageRouter {0} is offline'. format(node.storagerouter.name)) return [osd.slot_id]
def test_alba_arakoon_checkup(self): """ Validates whether the ALBA Arakoon checkup works (Manual and Scheduled) """ ovs_structure = DalHelper.build_dal_structure( structure={'storagerouters': [1]}) alba_structure = AlbaDalHelper.build_dal_structure( structure={'alba_backends': [[1, 'LOCAL']]}) ############################# # SCHEDULED_ARAKOON_CHECKUP # ############################# # Create an ABM and NSM cluster for ALBA Backend 1 and do some basic validations sr_1 = ovs_structure['storagerouters'][1] ab_1 = alba_structure['alba_backends'][1] MockedSSHClient._run_returns[sr_1.ip] = {} MockedSSHClient._run_returns[sr_1.ip][ 'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-abm/db'] = None MockedSSHClient._run_returns[sr_1.ip][ 'ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None AlbaController.add_cluster(ab_1.guid) abm_cluster_name = '{0}-abm'.format(ab_1.name) nsm_cluster_name = '{0}-nsm_0'.format(ab_1.name) arakoon_clusters = sorted(Configuration.list('/ovs/arakoon')) self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name], list2=arakoon_clusters) abm_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=abm_cluster_name) nsm_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=nsm_cluster_name) self.assertTrue(expr=abm_metadata['in_use']) self.assertTrue(expr=nsm_metadata['in_use']) # Run scheduled Arakoon checkup and validate amount of Arakoon clusters did not change AlbaArakoonController.scheduled_alba_arakoon_checkup() self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name], list2=arakoon_clusters) self.assertEqual(first=len(ab_1.abm_cluster.abm_services), second=1) self.assertEqual(first=len(ab_1.nsm_clusters), second=1) self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services), second=1) # Create 2 additional StorageRouters srs = DalHelper.build_dal_structure( structure={'storagerouters': [2, 3]}, previous_structure=ovs_structure)['storagerouters'] sr_2 = srs[2] sr_3 = srs[3] # Run scheduled checkup again and do some validations MockedSSHClient._run_returns[sr_2.ip] = {} MockedSSHClient._run_returns[sr_3.ip] = {} MockedSSHClient._run_returns[sr_2.ip][ 'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-abm/db'] = None MockedSSHClient._run_returns[sr_3.ip][ 'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_3/disk_1/partition_1/arakoon/backend_1-abm/db'] = None MockedSSHClient._run_returns[sr_2.ip][ 'arakoon --node {0} -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-abm/config -catchup-only' .format(sr_2.machine_id)] = None MockedSSHClient._run_returns[sr_3.ip][ 'arakoon --node {0} -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-abm/config -catchup-only' .format(sr_3.machine_id)] = None AlbaArakoonController.scheduled_alba_arakoon_checkup() self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name], list2=arakoon_clusters) self.assertEqual(first=len(ab_1.abm_cluster.abm_services), second=3) # Gone up from 1 to 3 self.assertEqual(first=len(ab_1.nsm_clusters), second=1) self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services), second=1) # Still 1 since NSM checkup hasn't ran yet # Make sure 1 StorageRouter is unreachable SSHClient._raise_exceptions[sr_3.ip] = { 'users': ['ovs'], 'exception': UnableToConnectException('No route to host') } AlbaArakoonController.scheduled_alba_arakoon_checkup() alba_logs = Logger._logs.get('lib', []) self.assertIn( member='Storage Router with IP {0} is not reachable'.format( sr_3.ip), container=alba_logs) ########################## # MANUAL_ARAKOON_CHECKUP # ########################## AlbaDalHelper.setup() # Clear everything ovs_structure = DalHelper.build_dal_structure( structure={'storagerouters': [1]}) alba_structure = AlbaDalHelper.build_dal_structure( structure={'alba_backends': [[1, 'LOCAL']]}) sr_1 = ovs_structure['storagerouters'][1] ab_1 = alba_structure['alba_backends'][1] MockedSSHClient._run_returns[sr_1.ip] = {} MockedSSHClient._run_returns[sr_1.ip][ 'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-abm/db'] = None MockedSSHClient._run_returns[sr_1.ip][ 'ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None AlbaController.add_cluster(ab_1.guid) # Run manual Arakoon checkup and validate amount of Arakoon clusters did not change AlbaArakoonController.manual_alba_arakoon_checkup( alba_backend_guid=ab_1.guid, nsm_clusters=[], abm_cluster=None) self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name], list2=arakoon_clusters) self.assertEqual(first=len(ab_1.abm_cluster.abm_services), second=1) self.assertEqual(first=len(ab_1.nsm_clusters), second=1) self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services), second=1) # Test some error paths with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.manual_alba_arakoon_checkup( alba_backend_guid=ab_1.guid, nsm_clusters=['no_abm_cluster_passed']) self.assertEqual( first=raise_info.exception.message, second='Both ABM cluster and NSM clusters must be provided') with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.manual_alba_arakoon_checkup( alba_backend_guid=ab_1.guid, nsm_clusters=[], abm_cluster='no_nsm_clusters_passed') self.assertEqual( first=raise_info.exception.message, second='Both ABM cluster and NSM clusters must be provided') with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.manual_alba_arakoon_checkup( alba_backend_guid=ab_1.guid, nsm_clusters=[nsm_cluster_name], abm_cluster=abm_cluster_name) self.assertEqual(first=raise_info.exception.message, second='Cluster {0} has already been claimed'.format( abm_cluster_name)) with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.manual_alba_arakoon_checkup( alba_backend_guid=ab_1.guid, nsm_clusters=['non-existing-nsm-cluster'], abm_cluster='non-existing-abm-cluster') self.assertEqual( first=raise_info.exception.message, second= 'Could not find an Arakoon cluster with name: non-existing-abm-cluster' ) # Recreate ALBA Backend with Arakoon clusters AlbaDalHelper.setup() # Clear everything ovs_structure = DalHelper.build_dal_structure( structure={'storagerouters': [1]}) alba_structure = AlbaDalHelper.build_dal_structure( structure={'alba_backends': [[1, 'LOCAL']]}) sr_1 = ovs_structure['storagerouters'][1] ab_1 = alba_structure['alba_backends'][1] # Create some Arakoon clusters to be claimed by the manual checkup for cluster_name, cluster_type in { 'manual-abm-1': ServiceType.ARAKOON_CLUSTER_TYPES.ABM, 'manual-abm-2': ServiceType.ARAKOON_CLUSTER_TYPES.ABM, 'manual-nsm-1': ServiceType.ARAKOON_CLUSTER_TYPES.NSM, 'manual-nsm-2': ServiceType.ARAKOON_CLUSTER_TYPES.NSM, 'manual-nsm-3': ServiceType.ARAKOON_CLUSTER_TYPES.NSM }.iteritems(): arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) arakoon_installer.create_cluster( cluster_type=cluster_type, ip=sr_1.ip, base_dir=DalHelper.CLUSTER_DIR.format(cluster_name), internal=False) arakoon_installer.start_cluster() arakoon_installer.unclaim_cluster() AlbaArakoonController.manual_alba_arakoon_checkup( alba_backend_guid=ab_1.guid, nsm_clusters=['manual-nsm-1', 'manual-nsm-3'], abm_cluster='manual-abm-2') # Validate the correct clusters have been claimed by the manual checkup unused_abms = ArakoonInstaller.get_unused_arakoon_clusters( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.ABM) unused_nsms = ArakoonInstaller.get_unused_arakoon_clusters( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.NSM) self.assertEqual(first=len(unused_abms), second=1) self.assertEqual(first=len(unused_nsms), second=1) self.assertEqual(first=unused_abms[0]['cluster_name'], second='manual-abm-1') self.assertEqual(first=unused_nsms[0]['cluster_name'], second='manual-nsm-2')
def test_nsm_checkup_internal(self): """ Validates whether the NSM checkup works for internally managed Arakoon clusters """ Configuration.set('/ovs/framework/plugins/alba/config|nsm.safety', 1) Configuration.set('/ovs/framework/plugins/alba/config|nsm.maxload', 10) structure = DalHelper.build_dal_structure(structure={'storagerouters': [1]}) alba_structure = AlbaDalHelper.build_dal_structure(structure={'alba_backends': [[1, 'LOCAL']]}) alba_backend = alba_structure['alba_backends'][1] storagerouter_1 = structure['storagerouters'][1] MockedSSHClient._run_returns[storagerouter_1.ip] = {} MockedSSHClient._run_returns[storagerouter_1.ip]['ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-abm/db'] = None MockedSSHClient._run_returns[storagerouter_1.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None VirtualAlbaBackend.run_log = {} AlbaController.add_cluster(alba_backend.guid) # Validation of nsm_checkup with self.assertRaises(ValueError): AlbaArakoonController.nsm_checkup(min_internal_nsms=0) # Min_nsms should be at least 1 # Validate single node NSM cluster self._validate_nsm([['1']]) self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['update_abm_client_config'], ['add_nsm_host', 'backend_1-nsm_0'], ['update_maintenance_config', '--eviction-type-random'], ['update_maintenance_config', 'enable-auto-cleanup-deleted-namespaces-days']]) VirtualAlbaBackend.run_log['backend_1-abm'] = [] AlbaArakoonController.nsm_checkup() # Running the NSM checkup should not change anything after an add_cluster self._validate_nsm([['1']]) self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], []) structure = DalHelper.build_dal_structure(structure={'storagerouters': [2]}, previous_structure=structure) VirtualAlbaBackend.run_log['backend_1-abm'] = [] AlbaArakoonController.nsm_checkup() # There should still be one NSM, since the safety is still at 1 self._validate_nsm([['1']]) self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], []) Configuration.set('/ovs/framework/plugins/alba/config|nsm.safety', 2) VirtualAlbaBackend.run_log['backend_1-abm'] = [] AlbaArakoonController.nsm_checkup() # There should still be one NSM, since the ABM isn't extended yet self._validate_nsm([['1']]) self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], []) storagerouter_2 = structure['storagerouters'][2] MockedSSHClient._run_returns[storagerouter_2.ip] = {} MockedSSHClient._run_returns[storagerouter_2.ip]['arakoon --node 2 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-abm/config -catchup-only'] = None MockedSSHClient._run_returns[storagerouter_2.ip]['ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-abm/db'] = None VirtualAlbaBackend.run_log['backend_1-abm'] = [] AlbaArakoonController.manual_alba_arakoon_checkup(alba_backend.guid, nsm_clusters=[]) self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['update_abm_client_config']]) MockedSSHClient._run_returns[storagerouter_2.ip]['arakoon --node 2 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-nsm_0/config -catchup-only'] = None MockedSSHClient._run_returns[storagerouter_2.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None VirtualAlbaBackend.run_log['backend_1-abm'] = [] AlbaArakoonController.nsm_checkup() # Now that the ABM was extended, the NSM should also be extended self._validate_nsm([['1', '2']]) self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['update_nsm_host', 'backend_1-nsm_0']]) MockedSSHClient._run_returns[storagerouter_1.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_1/db'] = None MockedSSHClient._run_returns[storagerouter_2.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_1/db'] = None MockedSSHClient._run_returns[storagerouter_1.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-nsm_1/db'] = None MockedSSHClient._run_returns[storagerouter_2.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-nsm_1/db'] = None MockedSSHClient._run_returns[storagerouter_1.ip]['arakoon --node 1 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-nsm_1/config -catchup-only'] = None MockedSSHClient._run_returns[storagerouter_2.ip]['arakoon --node 2 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-nsm_1/config -catchup-only'] = None VirtualAlbaBackend.run_log['backend_1-abm'] = [] AlbaArakoonController.nsm_checkup(min_internal_nsms=2) # A second NSM cluster (running on two nodes) should be added self._validate_nsm([['1', '2'], ['1', '2']]) self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['add_nsm_host', 'backend_1-nsm_1']]) VirtualAlbaBackend.data['backend_1-abm']['nsms'][0]['namespaces_count'] = 25 VirtualAlbaBackend.run_log['backend_1-abm'] = [] AlbaArakoonController.nsm_checkup() # Nothing should be happened, since there's still a non-overloaded NSM self._validate_nsm([['1', '2'], ['1', '2']]) self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], []) VirtualAlbaBackend.data['backend_1-abm']['nsms'][1]['namespaces_count'] = 25 MockedSSHClient._run_returns[storagerouter_1.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_2/db'] = None MockedSSHClient._run_returns[storagerouter_2.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_2/db'] = None MockedSSHClient._run_returns[storagerouter_1.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-nsm_2/db'] = None MockedSSHClient._run_returns[storagerouter_2.ip]['ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-nsm_2/db'] = None MockedSSHClient._run_returns[storagerouter_1.ip]['arakoon --node 1 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-nsm_2/config -catchup-only'] = None MockedSSHClient._run_returns[storagerouter_2.ip]['arakoon --node 2 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-nsm_2/config -catchup-only'] = None VirtualAlbaBackend.run_log['backend_1-abm'] = [] AlbaArakoonController.nsm_checkup() # A third NSM cluster (running on two nodes) should be added self._validate_nsm([['1', '2'], ['1', '2'], ['1', '2']]) self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['add_nsm_host', 'backend_1-nsm_2']]) VirtualAlbaBackend.run_log['backend_1-abm'] = [] AlbaArakoonController.nsm_checkup() # Running the checkup should not change anything self._validate_nsm([['1', '2'], ['1', '2'], ['1', '2']]) self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [])
def check_nsm_load(cls, result_handler, max_load=None, use_total_capacity=False, total_capacity_warning=None, total_capacity_error=None): """ Checks all NSM services registered within the Framework and will report their load :param result_handler: logging object :type result_handler: ovs.extensions.healthcheck.result.HCResults :param max_load: Maximum load percentage before marking it as overloaded. Defaults to ovs/framework/plugins/alba/config|nsm.maxload :type max_load: float :param use_total_capacity: Base NSM load of the total possible capacity (capacity of NSMs before they are marked as overloaded) instead of checking the least filled NSM. Use threshold arguments for tuning' :type use_total_capacity: bool :param total_capacity_warning: Number of remaining namespaces threshold before throwing a warning. Defaults 20% of the total namespaces :type total_capacity_warning: int :param total_capacity_error: Number of remaining namespaces threshold before throwing an error. Defaults to 5% of the total namespaces :type total_capacity_error: int :return: None :rtype: NoneType """ max_nsm_load_config = Configuration.get('ovs/framework/plugins/alba/config|nsm.maxload') max_load = max_load or max_nsm_load_config for alba_backend in AlbaBackendList.get_albabackends(): if alba_backend.abm_cluster is None: result_handler.failure('No ABM cluster found for ALBA Backend {0}'.format(alba_backend.name)) continue if len(alba_backend.abm_cluster.abm_services) == 0: result_handler.failure('ALBA Backend {0} does not have any registered ABM services'.format(alba_backend.name)) continue if len(alba_backend.nsm_clusters) == 0: result_handler.failure('ALBA Backend {0} does not have any registered NSM services'.format(alba_backend.name)) continue internal = alba_backend.abm_cluster.abm_services[0].service.is_internal if use_total_capacity: maximum_capacity_before_overload = AlbaHealthCheck._get_nsm_max_capacity_before_overload(alba_backend, max_nsm_load_config) total_capacity_warning = total_capacity_warning or math.ceil(maximum_capacity_before_overload * 1.0/5) total_capacity_error = total_capacity_error or math.ceil(maximum_capacity_before_overload * 1.0/20) config = Configuration.get_configuration_path(key=alba_backend.abm_cluster.config_location) hosts_data = AlbaCLI.run(command='list-nsm-hosts', config=config) current_capacity = sum([host['namespaces_count'] for host in hosts_data if not host['lost']]) remaining_capacity = maximum_capacity_before_overload - current_capacity if remaining_capacity > total_capacity_warning and remaining_capacity > total_capacity_error: # Only error could be specified result_handler.success('NSMs for backend {0} have enough capacity remaining ({1}/{2} used)'.format(alba_backend.name, current_capacity, maximum_capacity_before_overload), code=ErrorCodes.nsm_load_ok) elif total_capacity_warning >= remaining_capacity > total_capacity_error: result_handler.warning('NSMs for backend {0} have reached the warning threshold ' '({1} namespaces had to be remaining, {2}/{3} used)'.format(alba_backend.name, total_capacity_warning, current_capacity, maximum_capacity_before_overload), code=ErrorCodes.nsm_load_ok) else: result_handler.failure('NSMs for backend {0} have reached the error threshold ' '({1} namespaces had to be remaining, ({2}/{3} used)'.format(alba_backend.name, total_capacity_error, current_capacity, maximum_capacity_before_overload), code=ErrorCodes.nsm_load_ok) else: nsm_loads = {} sorted_nsm_clusters = sorted(alba_backend.nsm_clusters, key=lambda k: k.number) for nsm_cluster in sorted_nsm_clusters: nsm_loads[nsm_cluster.number] = AlbaController.get_load(nsm_cluster) overloaded = min(nsm_loads.values()) >= max_load if overloaded is False: result_handler.success('NSMs for backend {0} are not overloaded'.format(alba_backend.name), code=ErrorCodes.nsm_load_ok) else: if internal is True: result_handler.warning('NSMs for backend {0} are overloaded. The NSM checkup will take care of this'.format(alba_backend.name), code=ErrorCodes.nsm_load_warn) else: result_handler.failure('NSMs for backend {0} are overloaded. Please add your own NSM clusters to the backend'.format(alba_backend.name), code=ErrorCodes.nsm_load_failure)
def checkup_maintenance_agents(): """ Perform a maintenance agent checkup :return: None """ AlbaController.checkup_maintenance_agents()
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... """ AlbaMigrationController._logger.info( 'Preparing out of band migrations...') from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.albabackendlist import AlbaBackendList from ovs.dal.lists.albanodelist import AlbaNodeList from ovs.dal.lists.albaosdlist import AlbaOSDList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient, UnableToConnectException from ovs.extensions.migration.migration.albamigrator import ExtensionMigrator from ovs.extensions.packages.albapackagefactory import PackageFactory from ovs.extensions.services.albaservicefactory import ServiceFactory from ovs.extensions.plugins.albacli import AlbaCLI, AlbaError from ovs.lib.alba import AlbaController from ovs.lib.disk import DiskController AlbaMigrationController._logger.info('Start out of band migrations...') ############################################# # Introduction of IP:port combination on OSDs osd_info_map = {} alba_backends = AlbaBackendList.get_albabackends() for alba_backend in alba_backends: AlbaMigrationController._logger.info( 'Verifying ALBA Backend {0}'.format(alba_backend.name)) if alba_backend.abm_cluster is None: AlbaMigrationController._logger.warning( 'ALBA Backend {0} does not have an ABM cluster registered'. format(alba_backend.name)) continue AlbaMigrationController._logger.debug( 'Retrieving configuration path for ALBA Backend {0}'.format( alba_backend.name)) try: config = Configuration.get_configuration_path( alba_backend.abm_cluster.config_location) except: AlbaMigrationController._logger.exception( 'Failed to retrieve the configuration path for ALBA Backend {0}' .format(alba_backend.name)) continue AlbaMigrationController._logger.info( 'Retrieving OSD information for ALBA Backend {0}'.format( alba_backend.name)) try: osd_info = AlbaCLI.run(command='list-all-osds', config=config) except (AlbaError, RuntimeError): AlbaMigrationController._logger.exception( 'Failed to retrieve OSD information for ALBA Backend {0}'. format(alba_backend.name)) continue for osd_info in osd_info: if osd_info.get('long_id'): osd_info_map[osd_info['long_id']] = { 'ips': osd_info.get('ips', []), 'port': osd_info.get('port') } for osd in AlbaOSDList.get_albaosds(): if osd.osd_id not in osd_info_map: AlbaMigrationController._logger.warning( 'OSD with ID {0} is modelled but could not be found through ALBA' .format(osd.osd_id)) continue ips = osd_info_map[osd.osd_id]['ips'] port = osd_info_map[osd.osd_id]['port'] changes = False if osd.ips is None: changes = True osd.ips = ips if osd.port is None: changes = True osd.port = port if changes is True: AlbaMigrationController._logger.info( 'Updating OSD with ID {0} with IPS {1} and port {2}'. format(osd.osd_id, ips, port)) osd.save() ################################################### # Read preference for GLOBAL ALBA Backends (1.10.3) (https://github.com/openvstorage/framework-alba-plugin/issues/452) if Configuration.get(key='/ovs/framework/migration|read_preference', default=False) is False: try: name_backend_map = dict((alba_backend.name, alba_backend) for alba_backend in alba_backends) for alba_node in AlbaNodeList.get_albanodes(): AlbaMigrationController._logger.info( 'Processing maintenance services running on ALBA Node {0} with ID {1}' .format(alba_node.ip, alba_node.node_id)) alba_node.invalidate_dynamics('maintenance_services') for alba_backend_name, services in alba_node.maintenance_services.iteritems( ): if alba_backend_name not in name_backend_map: AlbaMigrationController._logger.error( 'ALBA Node {0} has services for an ALBA Backend {1} which is not modelled' .format(alba_node.ip, alba_backend_name)) continue alba_backend = name_backend_map[alba_backend_name] AlbaMigrationController._logger.info( 'Processing {0} ALBA Backend {1} with GUID {2}'. format(alba_backend.scaling, alba_backend.name, alba_backend.guid)) if alba_backend.scaling == alba_backend.SCALINGS.LOCAL: read_preferences = [alba_node.node_id] else: read_preferences = AlbaController.get_read_preferences_for_global_backend( alba_backend=alba_backend, alba_node_id=alba_node.node_id, read_preferences=[]) for service_name, _ in services: AlbaMigrationController._logger.info( 'Processing service {0}'.format(service_name)) old_config_key = '/ovs/alba/backends/{0}/maintenance/config'.format( alba_backend.guid) new_config_key = '/ovs/alba/backends/{0}/maintenance/{1}/config'.format( alba_backend.guid, service_name) if Configuration.exists(key=old_config_key): new_config = Configuration.get( key=old_config_key) new_config[ 'read_preference'] = read_preferences Configuration.set(key=new_config_key, value=new_config) for alba_backend in alba_backends: Configuration.delete( key='/ovs/alba/backends/{0}/maintenance/config'.format( alba_backend.guid)) AlbaController.checkup_maintenance_agents.delay() Configuration.set( key='/ovs/framework/migration|read_preference', value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating read preferences for ALBA Backends failed') ####################################################### # Storing actual package name in version files (1.11.0) (https://github.com/openvstorage/framework/issues/1876) changed_clients = set() storagerouters = StorageRouterList.get_storagerouters() if Configuration.get( key= '/ovs/framework/migration|actual_package_name_in_version_file_alba', default=False) is False: try: service_manager = ServiceFactory.get_manager() alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for( component=PackageFactory.COMP_ALBA) for storagerouter in storagerouters: try: root_client = SSHClient( endpoint=storagerouter.ip, username='******' ) # Use '.ip' instead of StorageRouter object because this code is executed during post-update at which point the heartbeat has not been updated for some time except UnableToConnectException: AlbaMigrationController._logger.exception( 'Updating actual package name for version files failed on StorageRouter {0}' .format(storagerouter.ip)) continue for file_name in root_client.file_list( directory=ServiceFactory.RUN_FILE_DIR): if not file_name.endswith('.version'): continue file_path = '{0}/{1}'.format( ServiceFactory.RUN_FILE_DIR, file_name) contents = root_client.file_read(filename=file_path) if alba_pkg_name == PackageFactory.PKG_ALBA_EE and '{0}='.format( PackageFactory.PKG_ALBA) in contents: # Rewrite the version file in the RUN_FILE_DIR contents = contents.replace( PackageFactory.PKG_ALBA, PackageFactory.PKG_ALBA_EE) root_client.file_write(filename=file_path, contents=contents) # Regenerate the service and update the EXTRA_VERSION_CMD in the configuration management service_name = file_name.split('.')[0] service_config_key = ServiceFactory.SERVICE_CONFIG_KEY.format( storagerouter.machine_id, service_name) if Configuration.exists(key=service_config_key): service_config = Configuration.get( key=service_config_key) if 'EXTRA_VERSION_CMD' in service_config: service_config[ 'EXTRA_VERSION_CMD'] = '{0}=`{1}`'.format( alba_pkg_name, alba_version_cmd) Configuration.set(key=service_config_key, value=service_config) service_manager.regenerate_service( name='ovs-arakoon', client=root_client, target_name='ovs-{0}'.format( service_name) ) # Leave out .version changed_clients.add(root_client) Configuration.set( key= '/ovs/framework/migration|actual_package_name_in_version_file_alba', value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating actual package name for version files failed') for root_client in changed_clients: try: root_client.run(['systemctl', 'daemon-reload']) except Exception: AlbaMigrationController._logger.exception( 'Executing command "systemctl daemon-reload" failed') #################################### # Fix for migration version (1.11.0) # Previous code could potentially store a higher version number in the config management than the actual version number if Configuration.get( key='/ovs/framework/migration|alba_migration_version_fix', default=False) is False: try: for storagerouter in storagerouters: config_key = '/ovs/framework/hosts/{0}/versions'.format( storagerouter.machine_id) if Configuration.exists(key=config_key): versions = Configuration.get(key=config_key) if versions.get(PackageFactory.COMP_MIGRATION_ALBA, 0) > ExtensionMigrator.THIS_VERSION: versions[ PackageFactory. COMP_MIGRATION_ALBA] = ExtensionMigrator.THIS_VERSION Configuration.set(key=config_key, value=versions) Configuration.set( key='/ovs/framework/migration|alba_migration_version_fix', value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating migration version failed') #################################### # Enable auto-cleanup migration_auto_cleanup_key = '/ovs/framework/migration|alba_auto_cleanup' if Configuration.get(key=migration_auto_cleanup_key, default=False) is False: try: for storagerouter in StorageRouterList.get_storagerouters(): storagerouter.invalidate_dynamics( 'features') # New feature was added errors = [] for alba_backend in AlbaBackendList.get_albabackends(): try: AlbaController.set_auto_cleanup(alba_backend.guid) except Exception as ex: AlbaMigrationController._logger.exception( 'Failed to set the auto-cleanup for ALBA Backend {0}' .format(alba_backend.name)) errors.append(ex) if len(errors) == 0: Configuration.set(key=migration_auto_cleanup_key, value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating auto cleanup failed') #################################### # Change cache eviction migration_random_eviction_key = '/ovs/framework/migration|alba_cache_eviction_random' if Configuration.get(key=migration_random_eviction_key, default=False) is False: try: errors = [] for alba_backend in AlbaBackendList.get_albabackends(): try: AlbaController.set_cache_eviction(alba_backend.guid) except Exception as ex: AlbaMigrationController._logger.exception( 'Failed to set the auto-cleanup for ALBA Backend {0}' .format(alba_backend.name)) errors.append(ex) if len(errors) == 0: Configuration.set(key=migration_random_eviction_key, value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating auto cleanup failed') ################################################### # Sync all disks and apply the backend role. Backend role was removed with the AD (since 1.10) albanode_backend_role_sync_key = '/ovs/framework/migration|albanode_backend_role_sync' if not Configuration.get(key=albanode_backend_role_sync_key, default=False): try: errors = [] for alba_node in AlbaNodeList.get_albanodes(): try: if not alba_node.storagerouter: continue stack = alba_node.client.get_stack() # type: dict for slot_id, slot_information in stack.iteritems(): osds = slot_information.get('osds', {}) # type: dict slot_aliases = slot_information.get( 'aliases', []) # type: list if not osds: # No osds means no partition was made continue # Sync to add all potential partitions that will need a backend role DiskController.sync_with_reality( storagerouter_guid=alba_node.storagerouter_guid ) for disk in alba_node.storagerouter.disks: if set(disk.aliases).intersection( set(slot_aliases)): partition = disk.partitions[0] if DiskPartition.ROLES.BACKEND not in partition.roles: partition.roles.append( DiskPartition.ROLES.BACKEND) partition.save() except Exception as ex: AlbaMigrationController._logger.exception( 'Syncing for storagerouter/albanode {0} failed'. format(alba_node.storagerouter.ip)) errors.append(ex) if not errors: Configuration.set(key=albanode_backend_role_sync_key, value=True) except Exception: AlbaMigrationController._logger.exception( 'Syncing up the disks for backend roles failed') AlbaMigrationController._logger.info('Finished out of band migrations')
def add_preset(alba_backend_guid, name, compression, policies, encryption, fragment_size=None): """ Adds a preset to Alba :param alba_backend_guid: Guid of the ALBA backend :type alba_backend_guid: str :param name: Name of the preset :type name: str :param compression: Compression type for the preset (none | snappy | bz2) :type compression: str :param policies: Policies for the preset :type policies: list :param encryption: Encryption for the preset (none | aes-cbc-256 | aes-ctr-256) :type encryption: str :param fragment_size: Size of a fragment in bytes (e.g. 1048576) :type fragment_size: int :return: None """ # VALIDATIONS if not re.match(Toolbox.regex_preset, name): raise ValueError('Invalid preset name specified') compression_options = ['snappy', 'bz2', 'none'] if compression not in compression_options: raise ValueError('Invalid compression format specified, please choose from: "{0}"'.format('", "'.join(compression_options))) encryption_options = ['aes-cbc-256', 'aes-ctr-256', 'none'] if encryption not in encryption_options: raise ValueError('Invalid encryption format specified, please choose from: "{0}"'.format('", "'.join(encryption_options))) if fragment_size is not None and (not isinstance(fragment_size, int) or not 16 <= fragment_size <= 1024 ** 3): raise ValueError('Fragment size should be a positive integer smaller than 1 GiB') AlbaPresetController._validate_policies_param(policies=policies) alba_backend = AlbaBackend(alba_backend_guid) if name in [preset['name'] for preset in alba_backend.presets]: raise RuntimeError('Preset with name {0} already exists'.format(name)) # ADD PRESET preset = {'compression': compression, 'object_checksum': {'default': ['crc-32c'], 'verify_upload': True, 'allowed': [['none'], ['sha-1'], ['crc-32c']]}, 'osds': ['all'], 'fragment_size': 16 * 1024 ** 2 if fragment_size is None else int(fragment_size), 'policies': policies, 'fragment_checksum': ['crc-32c'], 'fragment_encryption': ['none'], 'in_use': False, 'name': name} # Generate encryption key temp_key_file = None if encryption != 'none': encryption_key = ''.join(random.choice(chr(random.randint(32, 126))) for _ in range(32)) temp_key_file = tempfile.mktemp() with open(temp_key_file, 'wb') as temp_file: temp_file.write(encryption_key) temp_file.flush() preset['fragment_encryption'] = ['{0}'.format(encryption), '{0}'.format(temp_key_file)] # Dump preset content on filesystem config = Configuration.get_configuration_path(ArakoonInstaller.CONFIG_KEY.format(AlbaController.get_abm_cluster_name(alba_backend=alba_backend))) temp_config_file = tempfile.mktemp() with open(temp_config_file, 'wb') as data_file: data_file.write(json.dumps(preset)) data_file.flush() # Create preset AlbaPresetController._logger.debug('Adding preset {0} with compression {1} and policies {2}'.format(name, compression, policies)) AlbaCLI.run(command='create-preset', config=config, named_params={'input-url': temp_config_file}, extra_params=[name]) # Cleanup alba_backend.invalidate_dynamics() for filename in [temp_key_file, temp_config_file]: if filename and os.path.exists(filename) and os.path.isfile(filename): os.remove(filename)
def fill_slots(node_guid, osd_information, metadata=None): """ Creates 1 or more new OSDs :param node_guid: Guid of the node to which the disks belong :type node_guid: str :param osd_information: Information about the amount of OSDs to add to each Slot :type osd_information: list :param metadata: Metadata to add to the OSD (connection information for remote Backend, general Backend information) :type metadata: dict :return: None :rtype: NoneType """ metadata_type_validation = { 'integer': (int, None), 'osd_type': (str, AlbaOSD.OSD_TYPES.keys()), 'ip': (str, ExtensionsToolbox.regex_ip), 'port': (int, { 'min': 1, 'max': 65535 }) } node = AlbaNode(node_guid) required_params = {'slot_id': (str, None)} can_be_filled = False for flow in ['fill', 'fill_add']: if node.node_metadata[flow] is False: continue can_be_filled = True if flow == 'fill_add': required_params['alba_backend_guid'] = (str, None) for key, mtype in node.node_metadata['{0}_metadata'.format( flow)].iteritems(): if mtype in metadata_type_validation: required_params[key] = metadata_type_validation[mtype] if can_be_filled is False: raise ValueError('The given node does not support filling slots') validation_reasons = [] for osd_info in osd_information: # type: dict try: ExtensionsToolbox.verify_required_params( required_params=required_params, actual_params=osd_info) except RuntimeError as ex: validation_reasons.append(str(ex)) if len(validation_reasons) > 0: raise ValueError('Missing required parameter:\n *{0}'.format( '\n* '.join(validation_reasons))) for osd_info in osd_information: if node.node_metadata['fill'] is True: # Only filling is required AlbaNodeController._fill_slot( node, osd_info['slot_id'], dict((key, osd_info[key]) for key in node.node_metadata['fill_metadata'])) elif node.node_metadata['fill_add'] is True: # Fill the slot created_osds = AlbaNodeController._fill_slot( node, osd_info['slot_id'], dict((key, osd_info[key]) for key in node.node_metadata['fill_add_metadata'])) # And add/claim the OSD if node.type == AlbaNode.NODE_TYPES.S3: # The S3 manager returns the information about the osd when filling it for created_osd_info in created_osds: osd_info.update( created_osd_info ) # Add additional information about the osd AlbaController.add_osds( alba_backend_guid=osd_info['alba_backend_guid'], osds=[osd_info], alba_node_guid=node_guid, metadata=metadata) else: AlbaController.add_osds( alba_backend_guid=osd_info['alba_backend_guid'], osds=[osd_info], alba_node_guid=node_guid, metadata=metadata) node.invalidate_dynamics('stack')
def fill_slots(node_cluster_guid, node_guid, osd_information, metadata=None): # type: (str, str, List[Dict[str, Any]]) -> None """ Creates 1 or more new OSDs :param node_cluster_guid: Guid of the node cluster to which the disks belong :type node_cluster_guid: basestring :param node_guid: Guid of the AlbaNode to act as the 'active' side :type node_guid: basestring :param osd_information: Information about the amount of OSDs to add to each Slot :type osd_information: list :param metadata: Metadata to add to the OSD (connection information for remote Backend, general Backend information) :type metadata: dict :return: None :rtype: NoneType """ metadata_type_validation = { 'integer': (int, None), 'osd_type': (str, AlbaOSD.OSD_TYPES.keys()), 'ip': (str, ExtensionsToolbox.regex_ip), 'port': (int, { 'min': 1, 'max': 65535 }) } node_cluster = AlbaNodeCluster(node_cluster_guid) # Check for the active side if it's part of the cluster active_node = AlbaNode(node_guid) if active_node not in node_cluster.alba_nodes: raise ValueError( 'The requested active AlbaNode is not part of AlbaNodeCluster {0}' .format(node_cluster.guid)) required_params = {'slot_id': (str, None)} can_be_filled = False for flow in ['fill', 'fill_add']: if node_cluster.cluster_metadata[flow] is False: continue can_be_filled = True if flow == 'fill_add': required_params['alba_backend_guid'] = (str, None) for key, mtype in node_cluster.cluster_metadata[ '{0}_metadata'.format(flow)].iteritems(): if mtype in metadata_type_validation: required_params[key] = metadata_type_validation[mtype] if can_be_filled is False: raise ValueError( 'The given node cluster does not support filling slots') validation_reasons = [] for slot_info in osd_information: try: ExtensionsToolbox.verify_required_params( required_params=required_params, actual_params=slot_info) except RuntimeError as ex: validation_reasons.append(str(ex)) if len(validation_reasons) > 0: raise ValueError('Missing required parameter:\n *{0}'.format( '\n* '.join(validation_reasons))) for slot_info in osd_information: if node_cluster.cluster_metadata['fill'] is True: # Only filling is required active_node.client.fill_slot( slot_id=slot_info['slot_id'], extra=dict((key, slot_info[key]) for key in node_cluster.cluster_metadata['fill_metadata'])) elif node_cluster.cluster_metadata['fill_add'] is True: # Fill the slot active_node.client.fill_slot( slot_id=slot_info['slot_id'], extra=dict( (key, slot_info[key]) for key in node_cluster.cluster_metadata['fill_add_metadata'])) # And add/claim the OSD AlbaController.add_osds( alba_backend_guid=slot_info['alba_backend_guid'], osds=[slot_info], alba_node_guid=node_guid, metadata=metadata) # Invalidate the stack and sync towards all passive sides active_node.invalidate_dynamics('stack') for node in node_cluster.alba_nodes: if node != active_node: try: node.client.sync_stack(active_node.stack) except: AlbaNodeClusterController._logger.exception( 'Error while syncing stacks to the passive side') node_cluster.invalidate_dynamics('stack')
def remove_asd(node_guid, asd_id, expected_safety): """ Removes an ASD :param node_guid: Guid of the node to remove an ASD from :type node_guid: str :param asd_id: ID of the ASD to remove :type asd_id: str :param expected_safety: Expected safety after having removed the ASD :type expected_safety: dict or None :return: Aliases of the disk on which the ASD was removed :rtype: list """ node = AlbaNode(node_guid) AlbaNodeController._logger.debug('Removing ASD {0} at node {1}'.format(asd_id, node.ip)) model_osd = None for disk in node.disks: for asd in disk.osds: if asd.osd_id == asd_id: model_osd = asd break if model_osd is not None: break if model_osd is not None: alba_backend = model_osd.alba_backend else: alba_backend = None asds = {} try: asds = node.client.get_asds() except (requests.ConnectionError, requests.Timeout, InvalidCredentialsError): AlbaNodeController._logger.warning('Could not connect to node {0} to validate ASD'.format(node.guid)) partition_alias = None for alias, asd_ids in asds.iteritems(): if asd_id in asd_ids: partition_alias = alias break if alba_backend is not None: if expected_safety is None: AlbaNodeController._logger.warning('Skipping safety check for ASD {0} on backend {1} - this is dangerous'.format(asd_id, alba_backend.guid)) else: final_safety = AlbaController.calculate_safety(alba_backend_guid=alba_backend.guid, removal_osd_ids=[asd_id]) safety_lost = final_safety['lost'] safety_crit = final_safety['critical'] if (safety_crit != 0 or safety_lost != 0) and (safety_crit != expected_safety['critical'] or safety_lost != expected_safety['lost']): raise RuntimeError('Cannot remove ASD {0} as the current safety is not as expected ({1} vs {2})'.format(asd_id, final_safety, expected_safety)) AlbaNodeController._logger.debug('Safety OK for ASD {0} on backend {1}'.format(asd_id, alba_backend.guid)) AlbaNodeController._logger.debug('Purging ASD {0} on backend {1}'.format(asd_id, alba_backend.guid)) AlbaController.remove_units(alba_backend_guid=alba_backend.guid, osd_ids=[asd_id]) else: AlbaNodeController._logger.warning('Could not match ASD {0} to any backend. Cannot purge'.format(asd_id)) disk_data = None if partition_alias is not None: AlbaNodeController._logger.debug('Removing ASD {0} from disk {1}'.format(asd_id, partition_alias)) for device_info in node.client.get_disks().itervalues(): if partition_alias in device_info['partition_aliases']: disk_data = device_info result = node.client.delete_asd(disk_id=device_info['aliases'][0].split('/')[-1], asd_id=asd_id) if result['_success'] is False: raise RuntimeError('Error removing ASD: {0}'.format(result['_error'])) if disk_data == {}: raise RuntimeError('Failed to find disk for partition with alias {0}'.format(partition_alias)) else: AlbaNodeController._logger.warning('Could not remove ASD from remote node (node down)'.format(asd_id)) if Configuration.exists(AlbaNodeController.ASD_CONFIG.format(asd_id), raw=True): Configuration.delete(AlbaNodeController.ASD_CONFIG_DIR.format(asd_id), raw=True) if model_osd is not None: model_osd.delete() if alba_backend is not None: alba_backend.invalidate_dynamics() alba_backend.backend.invalidate_dynamics() if node.storagerouter is not None: DiskController.sync_with_reality(storagerouter_guid=node.storagerouter_guid) return [] if disk_data is None else disk_data.get('aliases', [])
def test_nsm_checkup_external(self): """ Validates whether the NSM checkup works for externally managed Arakoon clusters """ Configuration.set('/ovs/framework/plugins/alba/config|nsm.safety', 1) Configuration.set('/ovs/framework/plugins/alba/config|nsm.maxload', 10) structure = DalHelper.build_dal_structure(structure={'storagerouters': [1, 2, 3]}) alba_structure = AlbaDalHelper.build_dal_structure(structure={'alba_backends': [[1, 'LOCAL']]}) alba_backend = alba_structure['alba_backends'][1] storagerouter_1 = structure['storagerouters'][1] storagerouter_2 = structure['storagerouters'][2] # Validate some logic for externally managed arakoons during NSM checkup with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.nsm_checkup(external_nsm_cluster_names=['test']) # No ALBA Backend specified self.assertEqual(first=str(raise_info.exception), second='Additional NSMs can only be configured for a specific ALBA Backend') with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, min_internal_nsms=2, external_nsm_cluster_names=['test']) self.assertEqual(first=str(raise_info.exception), second="'min_internal_nsms' and 'external_nsm_cluster_names' are mutually exclusive") with self.assertRaises(ValueError) as raise_info: # noinspection PyTypeChecker AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names={}) # NSM cluster names must be a list self.assertEqual(first=str(raise_info.exception), second="'external_nsm_cluster_names' must be of type 'list'") with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=['non-existing-cluster']) # non-existing cluster names should raise self.assertEqual(first=str(raise_info.exception), second="Arakoon cluster with name non-existing-cluster does not exist") # Create an external ABM and NSM Arakoon cluster external_abm_1 = 'backend_1-abm' external_nsm_1 = 'backend_1-nsm_0' external_nsm_2 = 'backend_1-nsm_1' for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems(): arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) arakoon_installer.create_cluster(cluster_type=cluster_type, ip=storagerouter_1.ip, base_dir='/tmp', internal=False) arakoon_installer.extend_cluster(new_ip=storagerouter_2.ip, base_dir='/tmp') arakoon_installer.start_cluster() arakoon_installer.unclaim_cluster() self.assertDictEqual(d1={'cluster_name': cluster_name, 'cluster_type': cluster_type, 'internal': False, 'in_use': False}, d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)) # Let the 'add_cluster` claim the externally managed clusters and model the services Logger._logs = {} AlbaController.add_cluster(alba_backend_guid=alba_backend.guid, abm_cluster=external_abm_1, nsm_clusters=[external_nsm_1]) # Only claim external_nsm_1 for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems(): arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) self.assertDictEqual(d1={'cluster_name': cluster_name, 'cluster_type': cluster_type, 'internal': False, 'in_use': False if cluster_name == external_nsm_2 else True}, d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)) log_found = False for log_record in Logger._logs.get('lib', []): if 'NSM load OK' in log_record: log_found = True break self.assertTrue(expr=log_found) self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services)) self.assertEqual(first=1, second=len(alba_backend.nsm_clusters)) self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services)) self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter) self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter) self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['update_abm_client_config'], ['add_nsm_host', 'backend_1-nsm_0'], ['update_maintenance_config','--eviction-type-random'], ['update_maintenance_config','enable-auto-cleanup-deleted-namespaces-days']]) # Add cluster already invokes a NSM checkup, so nothing should have changed VirtualAlbaBackend.run_log['backend_1-abm'] = [] AlbaArakoonController.nsm_checkup() self.assertListEqual(list1=[], list2=VirtualAlbaBackend.run_log['backend_1-abm']) # Overload the only NSM and run NSM checkup. This should log a critical message, but change nothing VirtualAlbaBackend.data['backend_1-abm']['nsms'][0]['namespaces_count'] = 25 Logger._logs = {} AlbaArakoonController.nsm_checkup() log_found = False for log_record in Logger._logs.get('lib', []): if 'All NSM clusters are overloaded' in log_record: log_found = True break self.assertTrue(expr=log_found) self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services)) self.assertEqual(first=1, second=len(alba_backend.nsm_clusters)) self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services)) self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter) self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter) self.assertListEqual(list1=[], list2=VirtualAlbaBackend.run_log['backend_1-abm']) # Validate a maximum of 50 NSMs can be deployed current_nsms = [nsm_cluster.number for nsm_cluster in alba_backend.nsm_clusters] alba_structure = AlbaDalHelper.build_dal_structure( structure={'alba_nsm_clusters': [(1, 50)]}, # (<abackend_id>, <amount_of_nsm_clusters>) previous_structure=alba_structure ) # Try to add 1 additional NSM with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_2]) self.assertEqual(first=str(raise_info.exception), second='The maximum of 50 NSM Arakoon clusters will be exceeded. Amount of clusters that can be deployed for this ALBA Backend: 0') # Remove the unused NSM clusters again for nsm_cluster in alba_structure['alba_nsm_clusters'][1][len(current_nsms):]: for nsm_service in nsm_cluster.nsm_services: nsm_service.delete() nsm_service.service.delete() nsm_cluster.delete() # Try to add a previously claimed NSM cluster with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_1]) # The provided cluster_name to claim has already been claimed self.assertEqual(first=str(raise_info.exception), second='Some of the provided cluster_names have already been claimed before') # Add a 2nd NSM cluster AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_2]) self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services)) self.assertEqual(first=2, second=len(alba_backend.nsm_clusters)) self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services)) self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[1].nsm_services)) self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter) self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter) self.assertIsNone(obj=alba_backend.nsm_clusters[1].nsm_services[0].service.storagerouter) self.assertListEqual(list1=[['add_nsm_host', 'backend_1-nsm_1']], list2=VirtualAlbaBackend.run_log['backend_1-abm']) for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems(): arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) self.assertDictEqual(d1={'cluster_name': cluster_name, 'cluster_type': cluster_type, 'internal': False, 'in_use': True}, d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name))