def list(self, discover=False, ip=None, node_id=None): """ Lists all available ALBA Nodes :param discover: If True and IP provided, return list of single ALBA node, If True and no IP provided, return all ALBA nodes else return modeled ALBA nodes :param ip: IP of ALBA node to retrieve :param node_id: ID of the ALBA node """ if discover is False and (ip is not None or node_id is not None): raise RuntimeError('Discover is mutually exclusive with IP and nodeID') if (ip is None and node_id is not None) or (ip is not None and node_id is None): raise RuntimeError('Both IP and nodeID need to be specified') if discover is False: return AlbaNodeList.get_albanodes() if ip is not None: node = AlbaNode(volatile=True) node.ip = ip node.type = 'ASD' node.node_id = node_id node.port = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|port'.format(node_id)) node.username = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|username'.format(node_id)) node.password = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|password'.format(node_id)) data = node.client.get_metadata() if data['_success'] is False and data['_error'] == 'Invalid credentials': raise RuntimeError('Invalid credentials') if data['node_id'] != node_id: raise RuntimeError('Unexpected node identifier. {0} vs {1}'.format(data['node_id'], node_id)) node_list = DataList(AlbaNode, {}) node_list._executed = True node_list._guids = [node.guid] node_list._objects = {node.guid: node} node_list._data = {node.guid: {'guid': node.guid, 'data': node._data}} return node_list nodes = {} model_node_ids = [node.node_id for node in AlbaNodeList.get_albanodes()] found_node_ids = [] asd_node_ids = [] if EtcdConfiguration.dir_exists('/ovs/alba/asdnodes'): asd_node_ids = EtcdConfiguration.list('/ovs/alba/asdnodes') for node_id in asd_node_ids: node = AlbaNode(volatile=True) node.type = 'ASD' node.node_id = node_id node.ip = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|ip'.format(node_id)) node.port = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|port'.format(node_id)) node.username = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|username'.format(node_id)) node.password = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|password'.format(node_id)) if node.node_id not in model_node_ids and node.node_id not in found_node_ids: nodes[node.guid] = node found_node_ids.append(node.node_id) node_list = DataList(AlbaNode, {}) node_list._executed = True node_list._guids = nodes.keys() node_list._objects = nodes node_list._data = dict([(node.guid, {'guid': node.guid, 'data': node._data}) for node in nodes.values()]) return node_list
def get_node_by_id(node_id): """ Retrieve ASD node by ID :param node_id: ID of the ASD node :return: ASD node information """ return AlbaNodeList.get_albanode_by_node_id(node_id=node_id)
def discover_nodes(cls): # type: () -> Dict[str, AlbaNode] """ Discover nodes by querying the config mgmt :return: The discovered nodes, mapped by their guid :rtype: Dict[str, AlbaNode] """ nodes = {} model_node_ids = set(node.node_id for node in AlbaNodeList.get_albanodes()) found_node_ids = set() node_ids_by_type = {} for node_type, base_config_path in { AlbaNode.NODE_TYPES.ASD: ASD_NODE_BASE_PATH, AlbaNode.NODE_TYPES.S3: S3_NODE_BASE_PATH }.iteritems(): if Configuration.dir_exists(base_config_path): node_ids = Configuration.list(base_config_path) node_ids_by_type[node_type] = node_ids for node_type, node_ids in node_ids_by_type.iteritems(): for node_id in node_ids: if node_id not in model_node_ids and node_id not in found_node_ids: node = cls.model_volatile_node(node_id, node_type) nodes[node.guid] = node found_node_ids.add(node.node_id) return nodes
def model_albanodes(**kwargs): """ Add all ALBA nodes known to the config platform to the model :param kwargs: Kwargs containing information regarding the node :type kwargs: dict :return: None :rtype: NoneType """ _ = kwargs if Configuration.dir_exists('/ovs/alba/asdnodes'): for node_id in Configuration.list('/ovs/alba/asdnodes'): node = AlbaNodeList.get_albanode_by_node_id(node_id) if node is None: node = AlbaNode() main_config = Configuration.get( '/ovs/alba/asdnodes/{0}/config/main'.format(node_id)) node.type = 'ASD' node.node_id = node_id node.ip = main_config['ip'] node.port = main_config['port'] node.username = main_config['username'] node.password = main_config['password'] node.storagerouter = StorageRouterList.get_by_ip( main_config['ip']) node.save()
def get_albanode_by_node_id(alba_node_id): """ Fetches the alba node object with the specified id :param alba_node_id: id of the alba node :return: """ return AlbaNodeList.get_albanode_by_node_id(alba_node_id)
def _merge_downtime_information_alba(cls): """ Called when the 'Update' button in the GUI is pressed This call merges the downtime and prerequisite information present in the 'package_information' property for each ALBA Node DAL object :return: Information about prerequisites not met and downtime issues :rtype: dict """ cls._logger.debug( 'Retrieving downtime and prerequisite information for ALBA plugin') merged_update_info = {} for alba_node in AlbaNodeList.get_albanodes(): for component_name, component_info in alba_node.package_information.iteritems( ): if component_name not in merged_update_info: merged_update_info[component_name] = { 'downtime': [], 'prerequisites': [] } for downtime in component_info['downtime']: if downtime not in merged_update_info[component_name][ 'downtime']: merged_update_info[component_name]['downtime'].append( downtime) for prerequisite in component_info['prerequisites']: if prerequisite not in merged_update_info[component_name][ 'prerequisites']: merged_update_info[component_name][ 'prerequisites'].append(prerequisite) cls._logger.debug( 'Retrieved downtime and prerequisite information for ALBA plugin: {0}' .format(merged_update_info)) return merged_update_info
def package_install_sdm(package_info, components): """ Update the SDM packages :param package_info: Information about the packages (installed, candidate) :type package_info: dict :param components: Components which have been selected for update :type components: list :return: None """ if 'alba' not in components: return packages_to_install = {} for pkg_name, pkg_info in package_info.iteritems(): if pkg_name in AlbaUpdateController.sdm_packages: packages_to_install[pkg_name] = pkg_info if not packages_to_install: return AlbaUpdateController._logger.debug('Executing hook {0}'.format(inspect.currentframe().f_code.co_name)) for pkg_name, pkg_info in packages_to_install.iteritems(): for alba_node in AlbaNodeList.get_albanodes(): AlbaUpdateController._logger.debug('{0}: Updating SDM package {1} ({2} --> {3})'.format(alba_node.ip, pkg_name, pkg_info['installed'], pkg_info['candidate'])) try: alba_node.client.execute_update(pkg_name) except requests.ConnectionError as ce: if 'Connection aborted.' not in ce.message: # This error is thrown due the post-update code of the SDM package which restarts the asd-manager service raise AlbaUpdateController._logger.debug('{0}: Updated SDM package {1}'.format(alba_node.ip, pkg_name)) AlbaUpdateController._logger.debug('Executed hook {0}'.format(inspect.currentframe().f_code.co_name))
def register(node_id): """ Adds a Node with a given node_id to the model :param node_id: ID of the ALBA node :type node_id: str :return: None """ node = AlbaNodeList.get_albanode_by_node_id(node_id) if node is None: main_config = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id)) node = AlbaNode() node.ip = main_config['ip'] node.port = main_config['port'] node.username = main_config['username'] node.password = main_config['password'] node.storagerouter = StorageRouterList.get_by_ip(main_config['ip']) data = node.client.get_metadata() if data['_success'] is False and data['_error'] == 'Invalid credentials': raise RuntimeError('Invalid credentials') if data['node_id'] != node_id: AlbaNodeController._logger.error('Unexpected node_id: {0} vs {1}'.format(data['node_id'], node_id)) raise RuntimeError('Unexpected node identifier') node.node_id = node_id node.type = 'ASD' node.save() # increase maintenance agents count for all nodes by 1 for backend in AlbaBackendList.get_albabackends(): nr_of_agents_key = AlbaNodeController.NR_OF_AGENTS_ETCD_TEMPLATE.format(backend.guid) if EtcdConfiguration.exists(nr_of_agents_key): EtcdConfiguration.set(nr_of_agents_key, int(EtcdConfiguration.get(nr_of_agents_key) + 1)) else: EtcdConfiguration.set(nr_of_agents_key, 1) AlbaNodeController.checkup_maintenance_agents()
def _presets(self): """ Returns the policies active on the node """ if len(self.abm_services) == 0: return [] # No ABM services yet, so backend not fully installed yet asds = {} if self.scaling != AlbaBackend.SCALINGS.GLOBAL: for node in AlbaNodeList.get_albanodes(): asds[node.node_id] = 0 for disk in self.local_stack[node.node_id].values(): for asd_info in disk['asds'].values(): if asd_info['status'] in ['claimed', 'warning']: asds[node.node_id] += 1 config = Configuration.get_configuration_path('/ovs/arakoon/{0}-abm/config'.format(self.name)) presets = AlbaCLI.run(command='list-presets', config=config) preset_dict = {} for preset in presets: preset_dict[preset['name']] = preset if 'in_use' not in preset: preset['in_use'] = True if 'is_default' not in preset: preset['is_default'] = False preset['is_available'] = False preset['policies'] = [tuple(policy) for policy in preset['policies']] preset['policy_metadata'] = {} active_policy = None for policy in preset['policies']: is_available = False available_disks = 0 if self.scaling != AlbaBackend.SCALINGS.GLOBAL: available_disks += sum(min(asds[node], policy[3]) for node in asds) if self.scaling != AlbaBackend.SCALINGS.LOCAL: available_disks += sum(self.local_summary['devices'].values()) if available_disks >= policy[2]: if active_policy is None: active_policy = policy is_available = True preset['policy_metadata'][policy] = {'is_active': False, 'in_use': False, 'is_available': is_available} preset['is_available'] |= is_available if active_policy is not None: preset['policy_metadata'][active_policy]['is_active'] = True for namespace in self.ns_data: if namespace['namespace']['state'] != 'active': continue policy_usage = namespace['statistics']['bucket_count'] preset = preset_dict[namespace['namespace']['preset_name']] for usage in policy_usage: upolicy = tuple(usage[0]) # Policy as reported to be "in use" for cpolicy in preset['policies']: # All configured policies if upolicy[0] == cpolicy[0] and upolicy[1] == cpolicy[1] and upolicy[3] <= cpolicy[3]: preset['policy_metadata'][cpolicy]['in_use'] = True break for preset in presets: preset['policies'] = [str(policy) for policy in preset['policies']] for key in preset['policy_metadata'].keys(): preset['policy_metadata'][str(key)] = preset['policy_metadata'][key] del preset['policy_metadata'][key] return presets
def register(node_id): """ Adds a Node with a given node_id to the model :param node_id: ID of the ALBA node :type node_id: str :return: None """ node = AlbaNodeList.get_albanode_by_node_id(node_id) if node is None: main_config = Configuration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id)) node = AlbaNode() node.ip = main_config['ip'] node.port = main_config['port'] node.username = main_config['username'] node.password = main_config['password'] node.storagerouter = StorageRouterList.get_by_ip(main_config['ip']) data = node.client.get_metadata() if data['_success'] is False and data['_error'] == 'Invalid credentials': raise RuntimeError('Invalid credentials') if data['node_id'] != node_id: AlbaNodeController._logger.error('Unexpected node_id: {0} vs {1}'.format(data['node_id'], node_id)) raise RuntimeError('Unexpected node identifier') node.node_id = node_id node.type = 'ASD' node.save() AlbaController.checkup_maintenance_agents.delay()
def initialise_disks(alba_backend, nr_of_disks, disk_type): """ Initialize disks :param alba_backend: ALBA backend :param nr_of_disks: Amount of disks to initialize :param disk_type: Type of disks :return: None """ # Assume no disks are claimed by a remote environment alba_backend.invalidate_dynamics(['storage_stack']) storage_stack = alba_backend.storage_stack initialised_disks = 0 uninitialized_disk_names = [] for disks in storage_stack.values(): for disk_id, disk in disks.iteritems(): if disk['status'] == 'initialized': initialised_disks += 1 elif disk['status'] == 'uninitialized': uninitialized_disk_names.append(disk_id) nr_of_disks_to_init = nr_of_disks - initialised_disks if nr_of_disks_to_init <= 0: return True assert len(uninitialized_disk_names) >= nr_of_disks_to_init, "Not enough disks to initialize!" disks_to_init = GeneralAlba.filter_disks(uninitialized_disk_names, nr_of_disks_to_init, disk_type) assert len(disks_to_init) >= nr_of_disks_to_init, "Not enough disks to initialize!" grid_ip = General.get_config().get('main', 'grid_ip') alba_node = AlbaNodeList.get_albanode_by_ip(grid_ip) failures = AlbaNodeController.initialize_disks(alba_node.guid, dict((disk_id, 1) for disk_id in disks_to_init)) assert not failures, 'Alba disk initialization failed for (some) disks: {0}'.format(failures)
def register_node(node_cluster_guid, node_id=None, node_ids=None): # type: (str, str, List[str]) -> None """ Register a AlbaNode to the AlbaNodeCluster :param node_cluster_guid: Guid of the AlbaNodeCluster to add the node to :type node_cluster_guid: basestring :param node_id: ID of the ALBA node to register :type node_id: basestring :param node_ids: List of IDs of AlbaNodes to register :type node_ids: list[str] :return: None :rtype: NoneType """ if all(x is None for x in [node_id, node_ids]): raise ValueError('Either node_id or node_ids must be given') if node_ids is None: node_ids = [node_id] an_cluster = AlbaNodeCluster(node_cluster_guid) messages = [] for node_id in node_ids: try: an_node = AlbaNodeList.get_albanode_by_node_id(node_id) if an_node is None: messages.append( 'No AlbaNode found with ID {0}'.format(node_id)) continue # Validation for slot_id, slot_info in an_node.stack.iteritems(): for osd_id, osd_info in slot_info['osds'].iteritems(): claimed_by = osd_info.get('claimed_by') if claimed_by is not None: # Either UNKNOWN or a GUID: if claimed_by == AlbaNode.OSD_STATUSES.UNKNOWN: raise RuntimeError( 'Unable to link AlbaNode {0}. No information could be retrieved about OSD {1}' .format(node_id, osd_id)) raise RuntimeError( 'Unable to link AlbaNode {0} because it already has OSDs which are claimed' .format(node_id)) try: AlbaNodeClusterController.register_node_to_cluster( an_cluster.guid, an_node.node_id) except Exception: message = 'Unable to register the node under cluster' AlbaNodeClusterController._logger.exception(message) messages.append(message) continue an_node.alba_node_cluster = an_cluster an_node.save() except Exception: message = 'Unhandled Exception occurred during the registering of AlbaNode with id {0} under AlbaNodeCluster {1}'.format( node_id, node_cluster_guid) messages.append(message) AlbaNodeClusterController._logger.exception(message) if len(messages) > 0: raise ValueError( 'Errors occurred while registering AlbaNodes with IDs {0}:\n - {1}' .format(node_ids, '\n - '.join(messages)))
def get_albanode_by_ip(ip): """ Fetches an albanode object by ip :param ip: ip of the node :type ip: str :return: ovs.dal.hybrids.albanode.AlbaNode """ return AlbaNodeList.get_albanode_by_ip(ip)
def _get_update_information_plugin_alba(cls, error_information): """ Called by GenericController.refresh_package_information() every hour Retrieve and store the update information for all AlbaNodes :param error_information: Dict passed in by the thread to collect all errors :type error_information: dict :return: None :rtype: NoneType """ cls._logger.info('Refreshing ALBA plugin update information') error_count = 0 for alba_node in AlbaNodeList.get_albanodes(): if alba_node.type == AlbaNode.NODE_TYPES.GENERIC: continue cls._logger.debug( 'ALBA Node {0}: Refreshing update information'.format( alba_node.ip)) if alba_node.ip not in error_information: error_information[alba_node.ip] = [] try: update_info = alba_node.client.get_package_information() update_info_copy = copy.deepcopy(update_info) cls._logger.debug( 'ALBA Node {0}: Update information: {1}'.format( alba_node.ip, update_info)) for component, info in update_info_copy.iteritems(): if len(info['packages']) == 0: update_info.pop(component) cls._logger.debug( 'ALBA Node {0}: Storing update information: {1}'.format( alba_node.ip, update_info)) alba_node.package_information = update_info alba_node.save() cls._logger.debug( 'ALBA Node {0}: Refreshed update information'.format( alba_node.ip)) except (requests.ConnectionError, requests.Timeout): error_count += 1 cls._logger.warning( 'ALBA Node {0}: Update information could not be updated'. format(alba_node.ip)) error_information[alba_node.ip].append( 'Connection timed out or connection refused on {0}'.format( alba_node.ip)) except Exception as ex: error_count += 1 cls._logger.exception( 'ALBA Node {0}: Update information could not be updated'. format(alba_node.ip)) error_information[alba_node.ip].append(ex) if error_count == 0: cls._logger.info('Refreshed ALBA plugin update information')
def _presets(self): """ Returns the policies active on the node """ all_disks = self.all_disks disks = {} for node in AlbaNodeList.get_albanodes(): disks[node.node_id] = 0 for disk in all_disks: if disk['node_id'] == node.node_id and disk['status'] in ['claimed', 'warning']: disks[node.node_id] += 1 config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}-abm/config'.format(self.backend.name) presets = AlbaCLI.run('list-presets', config=config, as_json=True) preset_dict = {} for preset in presets: preset_dict[preset['name']] = preset if 'in_use' not in preset: preset['in_use'] = True if 'is_default' not in preset: preset['is_default'] = False preset['is_available'] = False preset['policies'] = [tuple(policy) for policy in preset['policies']] preset['policy_metadata'] = {} active_policy = None for policy in preset['policies']: is_available = False available_disks = sum(min(disks[node], policy[3]) for node in disks) if available_disks >= policy[2]: if active_policy is None: active_policy = policy is_available = True preset['policy_metadata'][policy] = {'is_active': False, 'in_use': False, 'is_available': is_available} preset['is_available'] |= is_available if active_policy is not None: preset['policy_metadata'][active_policy]['is_active'] = True for namespace in self.ns_data: if namespace['namespace']['state'] != 'active': continue policy_usage = namespace['statistics']['bucket_count'] preset = preset_dict[namespace['namespace']['preset_name']] for usage in policy_usage: upolicy = tuple(usage[0]) # Policy as reported to be "in use" for cpolicy in preset['policies']: # All configured policies if upolicy[0] == cpolicy[0] and upolicy[1] == cpolicy[1] and upolicy[3] <= cpolicy[3]: preset['policy_metadata'][cpolicy]['in_use'] = True break for preset in presets: preset['policies'] = [str(policy) for policy in preset['policies']] for key in preset['policy_metadata'].keys(): preset['policy_metadata'][str(key)] = preset['policy_metadata'][key] del preset['policy_metadata'][key] return presets
def ipmi_check(cls, result_handler): """ :param result_handler: logging object :type result_handler: ovs.extensions.healthcheck.result.HCResults :return: """ for albanode in AlbaNodeList.get_albanodes(): node_id = albanode.node_id ipmi_config_loc = '/ovs/alba/asdnodes/{0}/config/ipmi'.format( node_id) if not Configuration.exists(ipmi_config_loc): result_handler.skip( 'No IPMI info found on AlbaNode with ID {0}'.format( node_id)) continue ipmi_config = Configuration.get(ipmi_config_loc) ip = ipmi_config.get('ip') try: controller = IPMIController( ip=ip, username=ipmi_config.get('username'), password=ipmi_config.get('password'), client=SSHClient(System.get_my_storagerouter())) except: result_handler.failure( 'IPMI settings are not valid for AlbaNode with ID {0}'. format(node_id)) continue try: status = controller.status_node().get(ip) if status == IPMIController.IPMI_POWER_ON: result_handler.success( 'IPMI AlbaNode with ID {0} status is POWER ON'.format( node_id)) elif status == IPMIController.IPMI_POWER_OFF: result_handler.warning( 'IPMI AlbaNode with ID {0} status is POWER OFF'.format( node_id)) except IPMITimeOutException as ex: result_handler.failure( "IPMI AlbaNode with ID {0} timed out: '{1}'".format( node_id, ex)) except IPMICallException as ex: result_handler.failure( "IPMI AlbaNode with ID {0} call failed: '{1}'".format( node_id, ex)) except Exception: msg = 'Could not retrieve info through IPMI for AlbaNode with ID {0}'.format( node_id) cls.logger.exception(msg) result_handler.exception(msg)
def filter_disks(disk_names, amount, disk_type): """ Filter the available disks :param disk_names: Disks to filter :param amount: Amount to retrieve :param disk_type: Type of disk :return: Filtered disks """ node_ids = [] list_of_available_disks = {} filtered_disks = {} disk_count = 0 # disk_names = dictionary with node_ids as keys and values as a list of uninitialised disk names # {u'InA44YDJTKxFGvIKqD3CxYMlK7XxryZ0': [u'ata-TOSHIBA_MK2002TSKB_52Q2KSOTF', # u'ata-TOSHIBA_MK2002TSKB_52Q3KR6TF', # u'ata-TOSHIBA_MK2002TSKB_52Q2KSORF', # u'ata-TOSHIBA_MK2002TSKB_52Q2KSOVF', # u'ata-TOSHIBA_MK2002TSKB_52Q2KSOUF']} for node_id in disk_names.iterkeys(): node_ids.append(node_id) list_of_available_disks[node_id] = [] filtered_disks[node_id] = [] alba_node = AlbaNodeList.get_albanode_by_node_id(node_id) storagerouter = GeneralStorageRouter.get_storage_router_by_ip(ip=alba_node.ip) root_client = SSHClient(storagerouter, username='******') hdds, ssds = GeneralDisk.get_physical_disks(client=root_client) if disk_type == 'SATA': for hdd in hdds.values(): # add it to list_of_available_disks only if it's found in the uninitialised list for that node if hdd['name'] in disk_names[node_id]: list_of_available_disks[node_id].append(hdd) if disk_type == 'SSD': for ssd in ssds.values(): # add it to list_of_available_disks only if it's found in the uninitialised list for that node if ssd['name'] in disk_names[node_id]: list_of_available_disks[node_id].append(ssd) disk_count += len(list_of_available_disks[node_id]) count = 0 # all disks might be on a single node so we are going with the check to max of what we need for disk_index in range(amount): for node_id in node_ids: # if we still need disks we will add all disks found at the count value index in the list_of_available_disks disk lists if count < amount: if disk_index < len(list_of_available_disks[node_id]): filtered_disks[node_id].append('/dev/disk/by-id/' + list_of_available_disks[node_id][disk_index]['name']) count += 1 # this should run through the whole list even if we haven't reached the amount of disks needed return filtered_disks
def _merge_package_information_alba(cls): """ Retrieve the information stored in the 'package_information' property on the ALBA Node DAL object This actually returns all information stored in the 'package_information' property including downtime info, prerequisites, services, ... The caller of this function will strip out and merge the relevant package information :return: Update information for all ALBA Nodes :rtype: dict """ cls._logger.debug('Retrieving package information for ALBA plugin') update_info = {} for alba_node in AlbaNodeList.get_albanodes(): if alba_node.type == AlbaNode.NODE_TYPES.GENERIC: continue update_info[alba_node.ip] = alba_node.package_information cls._logger.debug('Retrieved package information for ALBA plugin') return update_info
def register(node_id=None, node_type=None, name=None): """ Adds a Node with a given node_id to the model :param node_id: ID of the ALBA node :type node_id: str :param node_type: Type of the node to create :type node_type: str :param name: Optional name of the node :type name: str :return: None :rtype: NoneType """ # Generic is a special case. Nothing is registered within config mgmt if node_type == AlbaNode.NODE_TYPES.GENERIC: node = AlbaNode() node.name = name node.node_id = ''.join( random.choice(string.ascii_letters + string.digits) for _ in range(32)) node.type = AlbaNode.NODE_TYPES.GENERIC node.save() else: # Both S3 and ASD type can be added now if node_id is None: raise RuntimeError('A node_id must be given for type ASD/S3') node = AlbaNodeList.get_albanode_by_node_id( node_id) or AlbaNodeController.get_discovered_node(node_id) if not node: # No node could be found in the model or within the discovered nodes. User might have specified the ID # of a node that does not exist raise RuntimeError( 'No node with node_id {0} was found'.format(node_id)) data = node.client.get_metadata() if data['_success'] is False and data[ '_error'] == 'Invalid credentials': raise RuntimeError('Invalid credentials') if data['node_id'] != node_id: AlbaNodeController._logger.error( 'Unexpected node_id: {0} vs {1}'.format( data['node_id'], node_id)) raise RuntimeError('Unexpected node identifier') if node.type == AlbaNode.NODE_TYPES.S3: # The transaction Arakoon is needed. This wil check deployment & extend AlbaArakoonController.configure_s3_transaction_cluster() node.volatile = False node.save() AlbaController.checkup_maintenance_agents.delay()
def add_units(self, albabackend, osds): """ Add storage units to the backend and register with alba nsm DEPRECATED API call - Use 'add_osds' instead :param albabackend: ALBA backend to add units to :type albabackend: AlbaBackend :param osds: Dict of osd_id as key, disk_id as value :type osds: Dict :return: Asynchronous result of a CeleryTask :rtype: celery.result.AsyncResult """ # Currently backwards compatible, should be removed at some point # Map to fill slots for backwards compatibility # Old call data: # {osd_id: disk_id} osd_type = 'ASD' osd_info = [] stack = None for osd_id, disk_alias in osds.iteritems(): slot_id = disk_alias.split('/')[-1] # Add units is pushed for a single ALBA Node so stack should be fetched one if stack is None: for alba_node in AlbaNodeList.get_albanodes(): _stack = alba_node.stack if slot_id in _stack: stack = _stack break if stack is None: raise HttpNotAcceptableException( error='stack_not_found', error_description= 'Could not find the matching stack for slot with ID {0}'. format(slot_id)) _osd = stack[slot_id]['osds'].get(osd_id) if _osd is None: raise HttpNotFoundException( error='osd_not_found', error_description='Could not find OSD {0} on Slot {1}'. format(osd_id, slot_id)) osd_info.append({ 'slot_id': slot_id, 'osd_type': osd_type, 'ips': _osd['ips'], 'port': _osd['port'] }) return AlbaController.add_osds.s( albabackend.guid, osd_info).apply_async(queue='ovs_masters')
def _local_stack(self): """ Returns a live list of all disks known to this AlbaBackend """ if self.abm_cluster is None: return {} # No ABM cluster yet, so backend not fully installed yet # Load information from node osd_statistics = self.osd_statistics def _load_live_info(_node, _storage_map): node_id = _node.node_id _storage_map[node_id] = {} for slot_id, _slot_data in _node.stack.iteritems(): # Pre-fill some info _storage_map[node_id][slot_id] = { 'osds': {}, 'name': slot_id, 'status': 'error', 'status_detail': 'unknown' } # Extend the OSD info with the usage information for osd_id, osd_data in _slot_data.get('osds', {}).iteritems(): if osd_id in osd_statistics: stats = osd_statistics[osd_id] osd_data['usage'] = { 'size': int(stats['capacity']), 'used': int(stats['disk_usage']), 'available': int(stats['capacity'] - stats['disk_usage']) } _storage_map[node_id][slot_id].update(_slot_data) threads = [] storage_map = {} for node in AlbaNodeList.get_albanodes(): thread = Thread(target=_load_live_info, args=(node, storage_map)) thread.start() threads.append(thread) for thread in threads: thread.join() return storage_map
def unregister_node(node_cluster_guid, node_id): # type: (str) -> None """ Unregisters an AlbaNode from the AlbaNodeCluster This will update the cluster to no longer work with active/passive :param node_cluster_guid: Guid of the AlbaNodeCluster to add the node to :type node_cluster_guid: basestring :param node_id: ID of the ALBA node to register :type node_id: basestring :return: None :rtype: NoneType """ _ = node_cluster_guid an_node = AlbaNodeList.get_albanode_by_node_id(node_id) an_node.alba_node_cluster = None an_node.save() raise NotImplementedError( 'Actions after removing the relation has not yet been implemented')
def initialise_disks(alba_backend, nr_of_disks, disk_type): """ Initialize disks :param alba_backend: ALBA backend :param nr_of_disks: Amount of disks to initialize :param disk_type: Type of disks :return: None """ # Assume no disks are claimed by a remote environment alba_backend.invalidate_dynamics(['local_stack']) local_stack = alba_backend.local_stack initialised_disks = 0 uninitialised_disks = 0 uninitialized_disk_names = {} for disks in local_stack.values(): for disk_id, disk in disks.iteritems(): if disk['status'] == 'initialized': initialised_disks += 1 elif disk['status'] == 'uninitialized': uninitialised_disks += 1 if disk['node_id'] in uninitialized_disk_names.keys(): uninitialized_disk_names[disk['node_id']].append(disk_id) else: uninitialized_disk_names[disk['node_id']] = [disk_id] nr_of_disks_to_init = nr_of_disks - initialised_disks if nr_of_disks_to_init <= 0: return True assert uninitialised_disks >= nr_of_disks_to_init, "Not enough disks to initialize!" disks_to_init = GeneralAlba.filter_disks(uninitialized_disk_names, nr_of_disks_to_init, disk_type) disks_found = 0 for node_id, disks in disks_to_init.iteritems(): disks_found += len(disks) assert disks_found >= nr_of_disks_to_init, "Not enough disks to initialize!" for node_id, disks in disks_to_init.iteritems(): alba_node = AlbaNodeList.get_albanode_by_node_id(node_id) failures = AlbaNodeController.initialize_disks(alba_node.guid, dict(('/dev/disk/by-id/' + disk_id, 1) for disk_id in disks)) assert not failures,\ 'Alba disk initialization failed for (some) disks: {0}'.format(failures)
def _wait_for_asd_count_with_status(_alba_backend, _nr_of_asds, status): grid_ip = General.get_config().get('main', 'grid_ip') alba_node = AlbaNodeList.get_albanode_by_ip(grid_ip) counter = GeneralAlba.ALBA_TIMER / GeneralAlba.ALBA_TIMER_STEP asds_with_status = {} while counter > 0: GeneralAlba.logger.info('counter: {0}'.format(counter)) _alba_backend.invalidate_dynamics(['storage_stack']) if alba_node.node_id in _alba_backend.storage_stack: for _disk in _alba_backend.storage_stack[alba_node.node_id].values(): for _asd_id, _asd in _disk['asds'].iteritems(): if _asd['status'] == status: asds_with_status[_asd_id] = _disk.get('guid') GeneralAlba.logger.info('looking for {0} asds with status {1}: {2}'.format(_nr_of_asds, status, asds_with_status)) if len(asds_with_status) >= _nr_of_asds: break counter -= 1 time.sleep(GeneralAlba.ALBA_TIMER_STEP) assert len(asds_with_status) >= _nr_of_asds,\ "Unable to find {0} asds, only found {1} asds with status: {2}.\n".format(_nr_of_asds, len(asds_with_status), status) return asds_with_status
def get_package_information_alba_plugin_storage_nodes(information): """ Called by GenericController.refresh_package_information() every hour Retrieve and store the package information for all AlbaNodes :return: None """ for alba_node in AlbaNodeList.get_albanodes(): if alba_node.ip not in information: information[alba_node.ip] = {'errors': []} elif 'errors' not in information[alba_node.ip]: information[alba_node.ip]['errors'] = [] try: alba_node.package_information = alba_node.client.get_package_information() alba_node.save() except (requests.ConnectionError, requests.Timeout): AlbaUpdateController._logger.warning('Update information for Alba Node with IP {0} could not be updated'.format(alba_node.ip)) information[alba_node.ip]['errors'].append('Connection timed out or connection refused on {0}'.format(alba_node.ip)) except Exception as ex: information[alba_node.ip]['errors'].append(ex)
def model_albanodes(**kwargs): """ Add all ALBA nodes known to the config platform to the model :param kwargs: Kwargs containing information regarding the node :type kwargs: dict :return: None """ _ = kwargs if Configuration.dir_exists('/ovs/alba/asdnodes'): for node_id in Configuration.list('/ovs/alba/asdnodes'): node = AlbaNodeList.get_albanode_by_node_id(node_id) if node is None: node = AlbaNode() main_config = Configuration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id)) node.type = 'ASD' node.node_id = node_id node.ip = main_config['ip'] node.port = main_config['port'] node.username = main_config['username'] node.password = main_config['password'] node.storagerouter = StorageRouterList.get_by_ip(main_config['ip']) node.save()
def list(self, discover=False, ip=None, node_id=None): """ Lists all available ALBA Nodes :param discover: If True and IP provided, return list of single ALBA node, If True and no IP provided, return all ALBA nodes else return modeled ALBA nodes :type discover: bool :param ip: IP of ALBA node to retrieve :type ip: str :param node_id: ID of the ALBA node :type node_id: str :return: A list of ALBA nodes :rtype: ovs.dal.datalist.DataList """ if discover is False and (ip is not None or node_id is not None): raise HttpNotAcceptableException( error='invalid_data', error_description= 'Discover is mutually exclusive with IP and nodeID') if (ip is None and node_id is not None) or (ip is not None and node_id is None): raise HttpNotAcceptableException( error='invalid_data', error_description='Both IP and nodeID need to be specified') if discover is False: return AlbaNodeList.get_albanodes() # Discover nodes nodes = self._discover_nodes(ip=ip, node_id=node_id) # Build the DataList node_list = DataList(AlbaNode) node_list._executed = True node_list._guids = nodes.keys() node_list._objects = nodes node_list._data = dict([(node.guid, { 'guid': node.guid, 'data': node._data }) for node in nodes.values()]) return node_list
def ipmi_check(cls, result_handler): """ :param result_handler: logging object :type result_handler: ovs.extensions.healthcheck.result.HCResults :return: """ for albanode in AlbaNodeList.get_albanodes(): node_id = albanode.node_id ipmi_config_loc = '/ovs/alba/asdnodes/{0}/config/ipmi'.format(node_id) if not Configuration.exists(ipmi_config_loc): result_handler.skip('No IPMI info found on AlbaNode with ID {0}'.format(node_id)) continue ipmi_config = Configuration.get(ipmi_config_loc) ip = ipmi_config.get('ip') try: controller = IPMIController(ip=ip, username=ipmi_config.get('username'), password=ipmi_config.get('password'), client=SSHClient(System.get_my_storagerouter())) except: result_handler.failure('IPMI settings are not valid for AlbaNode with ID {0}'.format(node_id)) continue try: status = controller.status_node().get(ip) if status == IPMIController.IPMI_POWER_ON: result_handler.success('IPMI AlbaNode with ID {0} status is POWER ON'.format(node_id)) elif status == IPMIController.IPMI_POWER_OFF: result_handler.warning('IPMI AlbaNode with ID {0} status is POWER OFF'.format(node_id)) except IPMITimeOutException as ex: result_handler.failure("IPMI AlbaNode with ID {0} timed out: '{1}'".format(node_id, ex)) except IPMICallException as ex: result_handler.failure("IPMI AlbaNode with ID {0} call failed: '{1}'".format(node_id, ex)) except Exception: msg = 'Could not retrieve info through IPMI for AlbaNode with ID {0}'.format(node_id) cls.logger.exception(msg) result_handler.exception(msg)
def post_update_alba_plugin_alba(components): """ Execute some functionality after the ALBA plugin packages have been updated For alba: * Restart arakoon-amb, arakoon-nsm on every client (if present and required) * Execute post-update functionality on every ALBA node :param components: Update components which have been executed :type components: list :return: None """ if 'alba' not in components: return # Update ALBA nodes AlbaUpdateController._logger.debug('Executing hook {0}'.format(inspect.currentframe().f_code.co_name)) for node in AlbaNodeList.get_albanodes(): if node.client.get_package_information(): AlbaUpdateController._logger.debug('{0}: Restarting services'.format(node.ip)) node.client.restart_services() # Renew maintenance services AlbaUpdateController._logger.debug('Checkup maintenance agents') AlbaController.checkup_maintenance_agents.delay() AlbaUpdateController._logger.debug('Executed hook {0}'.format(inspect.currentframe().f_code.co_name))
def migrate_sdm(): """ Executes async migrations for ALBA SDM node. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... """ from ovs.dal.lists.albanodelist import AlbaNodeList AlbaMigrationController._logger.info( 'Preparing out of band migrations for SDM...') for alba_node in AlbaNodeList.get_albanodes(): try: AlbaMigrationController._logger.info( 'Executing post-update migration code for ALBA Node {0}'. format(alba_node.node_id)) alba_node.client.update_execute_migration_code() except Exception: AlbaMigrationController._logger.exception( 'Executing post-update migration code for ALBA Node {0} failed' .format(alba_node.node_id)) AlbaMigrationController._logger.info( 'Finished out of band migrations for SDM')
def checkup_maintenance_agents(): """ Check if requested nr of maintenance agents / backend is actually present Add / remove as necessary :return: None """ service_template_key = 'alba-maintenance_{0}-{1}' maintenance_agents_map = {} asd_nodes = AlbaNodeList.get_albanodes() nr_of_storage_nodes = len(asd_nodes) def _get_node_load(backend_name): highest_load = 0 lowest_load = sys.maxint agent_load = {'high_load_node': asd_nodes[0] if asd_nodes else None, 'low_load_node': asd_nodes[0] if asd_nodes else None, 'total_load': 0} for asd_node in asd_nodes: actual_nr_of_agents = 0 maint_services = asd_node.client.list_maintenance_services() for service_name in maint_services: if service_template_key.format(backend_name, '') in service_name: actual_nr_of_agents += 1 if actual_nr_of_agents > highest_load: agent_load['high_load_node'] = asd_node highest_load = actual_nr_of_agents if actual_nr_of_agents < lowest_load: agent_load['low_load_node'] = asd_node lowest_load = actual_nr_of_agents agent_load['total_load'] += actual_nr_of_agents return agent_load alba_backends = AlbaBackendList.get_albabackends() for alba_backend in alba_backends: nr_of_agents_key = AlbaNodeController.NR_OF_AGENTS_ETCD_TEMPLATE.format(alba_backend.guid) name = alba_backend.backend.name if not EtcdConfiguration.exists(nr_of_agents_key): EtcdConfiguration.set(nr_of_agents_key, nr_of_storage_nodes) required_nr = EtcdConfiguration.get(nr_of_agents_key) maintenance_agents_map[name] = {'required': required_nr, 'actual': _get_node_load(name)['total_load'], 'backend': alba_backend.backend} for name, values in maintenance_agents_map.iteritems(): AlbaNodeController._logger.info('Checking backend: {0}'.format(name)) to_process = values['required'] - values['actual'] if to_process == 0: AlbaNodeController._logger.info('No action required for: {0}'.format(name)) elif to_process >= 0: AlbaNodeController._logger.info('Adding {0} maintenance agent(s) for {1}'.format(to_process, name)) for _ in xrange(to_process): unique_hash = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(16)) node = _get_node_load(name)['low_load_node'] AlbaNodeController._logger.info('Service to add: ' + service_template_key.format(name, unique_hash)) if node and node.client: node.client.add_maintenance_service(service_template_key.format(name, unique_hash), values['backend'].alba_backend.guid, AlbaController.get_abm_service_name(values['backend'])) AlbaNodeController._logger.info('Service added') else: to_process = abs(to_process) AlbaNodeController._logger.info('Removing {0} maintenance agent(s) for {1}'.format(to_process, name)) for _ in xrange(to_process): node = _get_node_load(name)['high_load_node'] services = node.client.list_maintenance_services() if services and node and node.client: for service in services: if 'alba-maintenance_' + name in service: node.client.remove_maintenance_service(service) break
def _get_update_information_cluster_alba(cls, client, update_info, package_info): """ In this function the services for each component / package combination are defined This service information consists out of: * Services to stop (before update) and start (after update of packages) -> 'services_stop_start' * Services to restart after update (post-update logic) -> 'services_post_update' * Down-times which will be caused due to service restarts -> 'downtime' * Prerequisites that have not been met -> 'prerequisites' Verify whether all relevant services have the correct binary active Whether a service has the correct binary version in use, we use the ServiceFactory.get_service_update_versions functionality When a service has an older binary version running, we add this information to the 'update_info' This combined information is then stored in the 'package_information' of the StorageRouter DAL object :param client: SSHClient on which to retrieve the service information required for an update :type client: ovs.extensions.generic.sshclient.SSHClient :param update_info: Dictionary passed in by the thread calling this function used to store all update information :type update_info: dict :param package_info: Dictionary containing the components and packages which have an update available for current SSHClient :type package_info: dict :return: None :rtype: NoneType """ cls._logger.info( 'StorageRouter {0}: Refreshing ALBA update information'.format( client.ip)) try: binaries = cls._package_manager.get_binary_versions(client=client) storagerouter = StorageRouterList.get_by_ip(ip=client.ip) cls._logger.debug('StorageRouter {0}: Binary versions: {1}'.format( client.ip, binaries)) # Retrieve Arakoon information arakoon_info = {} for service in storagerouter.services: if service.type.name not in [ ServiceType.SERVICE_TYPES.ALBA_MGR, ServiceType.SERVICE_TYPES.NS_MGR ]: continue if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR: cluster_name = service.abm_service.abm_cluster.name alba_backend_name = service.abm_service.abm_cluster.alba_backend.name else: cluster_name = service.nsm_service.nsm_cluster.name alba_backend_name = service.nsm_service.nsm_cluster.alba_backend.name cls._logger.debug( 'StorageRouter {0}: Retrieving update information for Arakoon cluster {1}' .format(client.ip, cluster_name)) arakoon_update_info = ArakoonInstaller.get_arakoon_update_info( cluster_name=cluster_name) cls._logger.debug( 'StorageRouter {0}: Arakoon update information for cluster {1}: {2}' .format(client.ip, cluster_name, arakoon_update_info)) if arakoon_update_info['internal'] is True: arakoon_info[arakoon_update_info['service_name']] = [ 'backend', alba_backend_name ] if arakoon_update_info['downtime'] is True else None for component, package_names in PackageFactory.get_package_info( )['names'].iteritems(): package_names = sorted(package_names) cls._logger.debug( 'StorageRouter {0}: Validating component {1} and related packages: {2}' .format(client.ip, component, package_names)) if component not in update_info[client.ip]: update_info[client.ip][component] = copy.deepcopy( ServiceFactory.DEFAULT_UPDATE_ENTRY) svc_component_info = update_info[client.ip][component] pkg_component_info = package_info.get(component, {}) for package_name in package_names: cls._logger.debug( 'StorageRouter {0}: Validating ALBA plugin related package {1}' .format(client.ip, package_name)) if package_name == PackageFactory.PKG_OVS_BACKEND and package_name in pkg_component_info: if ['gui', None] not in svc_component_info['downtime']: svc_component_info['downtime'].append( ['gui', None]) if ['api', None] not in svc_component_info['downtime']: svc_component_info['downtime'].append( ['api', None]) svc_component_info['services_stop_start'][10].append( 'ovs-watcher-framework') svc_component_info['services_stop_start'][20].append( 'memcached') cls._logger.debug( 'StorageRouter {0}: Added services "ovs-watcher-framework" and "memcached" to stop-start services' .format(client.ip)) cls._logger.debug( 'StorageRouter {0}: Added GUI and API to downtime'. format(client.ip)) elif package_name in [ PackageFactory.PKG_ALBA, PackageFactory.PKG_ALBA_EE ]: # Retrieve proxy service information for service in storagerouter.services: if service.type.name != ServiceType.SERVICE_TYPES.ALBA_PROXY or service.alba_proxy is None: continue service_version = None if package_name not in pkg_component_info: service_version = ServiceFactory.get_service_update_versions( client=client, service_name=service.name, binary_versions=binaries) cls._logger.debug( 'StorageRouter {0}: Service {1} is running version {2}' .format(client.ip, service.name, service_version)) if package_name in pkg_component_info or service_version is not None: if service_version is not None and package_name not in svc_component_info[ 'packages']: svc_component_info['packages'][ package_name] = service_version svc_component_info['services_post_update'][ 10].append('ovs-{0}'.format(service.name)) cls._logger.debug( 'StorageRouter {0}: Added service {1} to post-update services' .format(client.ip, 'ovs-{0}'.format(service.name))) downtime = [ 'proxy', service.alba_proxy.storagedriver.vpool.name ] if downtime not in svc_component_info[ 'downtime']: svc_component_info['downtime'].append( downtime) cls._logger.debug( 'StorageRouter {0}: Added ALBA proxy downtime for vPool {1} to downtime' .format( client.ip, service.alba_proxy. storagedriver.vpool.name)) if package_name in [ PackageFactory.PKG_ALBA, PackageFactory.PKG_ALBA_EE, PackageFactory.PKG_ARAKOON ]: for service_name, downtime in arakoon_info.iteritems(): service_version = ServiceFactory.get_service_update_versions( client=client, service_name=service_name, binary_versions=binaries, package_name=package_name) cls._logger.debug( 'StorageRouter {0}: Arakoon service {1} information: {2}' .format(client.ip, service_name, service_version)) if package_name in pkg_component_info or service_version is not None: svc_component_info['services_post_update'][ 10].append('ovs-{0}'.format(service_name)) cls._logger.debug( 'StorageRouter {0}: Added service {1} to post-update services' .format(client.ip, 'ovs-{0}'.format(service_name))) if service_version is not None and package_name not in svc_component_info[ 'packages']: svc_component_info['packages'][ package_name] = service_version if downtime is not None and downtime not in svc_component_info[ 'downtime']: svc_component_info['downtime'].append( downtime) cls._logger.debug( 'StorageRouter {0}: Added Arakoon cluster for ALBA Backend {1} to downtime' .format(client.ip, downtime[1])) # Extend the service information with the package information related to this repository for current StorageRouter if package_name in pkg_component_info and package_name not in svc_component_info[ 'packages']: cls._logger.debug( 'StorageRouter {0}: Adding package {1} because it has an update available' .format(client.ip, package_name)) svc_component_info['packages'][ package_name] = pkg_component_info[package_name] if component == PackageFactory.COMP_ALBA: for alba_node in AlbaNodeList.get_albanodes(): try: alba_node.client.get_metadata() except: svc_component_info['prerequisites'].append( ['alba_node_unresponsive', alba_node.ip]) cls._logger.debug( 'StorageRouter {0}: Added unresponsive ALBA Node {1} to prerequisites' .format(client.ip, alba_node.ip)) # Verify whether migration (DAL and extension) code needs to be executed (only if no packages have an update available so far) elif component == PackageFactory.COMP_FWK and PackageFactory.PKG_OVS_BACKEND not in svc_component_info[ 'packages']: cls._logger.debug( 'StorageRouter {0}: No updates detected, checking for required migrations' .format(client.ip)) # Extension migration check key = '/ovs/framework/hosts/{0}/versions'.format( System.get_my_machine_id(client=client)) old_version = Configuration.get(key, default={}).get( PackageFactory.COMP_MIGRATION_ALBA) installed_version = str( cls._package_manager.get_installed_versions( client=client, package_names=[PackageFactory.PKG_OVS_BACKEND ])[PackageFactory.PKG_OVS_BACKEND]) migrations_detected = False if old_version is not None: cls._logger.debug( 'StorageRouter {0}: Current running version for {1} extension migrations: {2}' .format(client.ip, PackageFactory.COMP_ALBA, old_version)) with remote(client.ip, [ExtensionMigrator]) as rem: cls._logger.debug( 'StorageRouter {0}: Available version for {1} extension migrations: {2}' .format(client.ip, PackageFactory.COMP_ALBA, rem.ExtensionMigrator.THIS_VERSION)) if rem.ExtensionMigrator.THIS_VERSION > old_version: migrations_detected = True svc_component_info['packages'][ PackageFactory.PKG_OVS_BACKEND] = { 'installed': 'migrations', 'candidate': installed_version } # DAL migration check if migrations_detected is False: persistent_client = PersistentFactory.get_client() old_version = persistent_client.get( 'ovs_model_version').get( PackageFactory.COMP_MIGRATION_ALBA ) if persistent_client.exists( 'ovs_model_version') else None if old_version is not None: cls._logger.debug( 'StorageRouter {0}: Current running version for {1} DAL migrations: {2}' .format(client.ip, PackageFactory.COMP_ALBA, old_version)) with remote(client.ip, [DALMigrator]) as rem: cls._logger.debug( 'StorageRouter {0}: Available version for {1} DAL migrations: {2}' .format(client.ip, PackageFactory.COMP_ALBA, rem.DALMigrator.THIS_VERSION)) if rem.DALMigrator.THIS_VERSION > old_version: svc_component_info['packages'][ PackageFactory.PKG_OVS_BACKEND] = { 'installed': 'migrations', 'candidate': installed_version } cls._logger.info( 'StorageRouter {0}: Refreshed ALBA update information'.format( client.ip)) except Exception as ex: cls._logger.exception( 'StorageRouter {0}: Refreshing ALBA update information failed'. format(client.ip)) if 'errors' not in update_info[client.ip]: update_info[client.ip]['errors'] = [] update_info[client.ip]['errors'].append(ex)
def get_update_information_alba_plugin(information): """ Called when the 'Update' button in the GUI is pressed This call collects additional information about the packages which can be updated Eg: * Downtime for Arakoons * Downtime for StorageDrivers * Prerequisites that haven't been met * Services which will be stopped during update * Services which will be restarted after update """ # Verify arakoon info arakoon_ovs_info = {'down': False, 'name': None, 'internal': False} arakoon_cacc_info = {'down': False, 'name': None, 'internal': False} for cluster in ['cacc', 'ovsdb']: cluster_name = ArakoonClusterConfig.get_cluster_name(cluster) if cluster_name is None: continue if cluster == 'cacc': arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name, filesystem=True, ip=System.get_my_storagerouter().ip) else: arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name) if arakoon_metadata['internal'] is True: config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=(cluster == 'cacc')) config.load_config(System.get_my_storagerouter().ip if cluster == 'cacc' else None) if cluster == 'ovsdb': arakoon_ovs_info['down'] = len(config.nodes) < 3 arakoon_ovs_info['name'] = arakoon_metadata['cluster_name'] arakoon_ovs_info['internal'] = True else: arakoon_cacc_info['name'] = arakoon_metadata['cluster_name'] arakoon_cacc_info['internal'] = True # Verify StorageRouter downtime fwk_prerequisites = [] all_storagerouters = StorageRouterList.get_storagerouters() for storagerouter in all_storagerouters: try: SSHClient(endpoint=storagerouter, username='******') except UnableToConnectException: fwk_prerequisites.append(['node_down', storagerouter.name]) # Verify ALBA node responsiveness alba_prerequisites = [] for alba_node in AlbaNodeList.get_albanodes(): try: alba_node.client.get_metadata() except Exception: alba_prerequisites.append(['alba_node_unresponsive', alba_node.ip]) for key in ['framework', 'alba']: if key not in information: information[key] = {'packages': {}, 'downtime': [], 'prerequisites': fwk_prerequisites if key == 'framework' else alba_prerequisites, 'services_stop_start': set(), 'services_post_update': set()} for storagerouter in StorageRouterList.get_storagerouters(): if key not in storagerouter.package_information: continue # Retrieve Arakoon issues arakoon_downtime = [] arakoon_services = [] for service in storagerouter.services: if service.type.name not in [ServiceType.SERVICE_TYPES.ALBA_MGR, ServiceType.SERVICE_TYPES.NS_MGR]: continue if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR: cluster_name = AlbaController.get_abm_cluster_name(alba_backend=service.abm_service.alba_backend) else: cluster_name = AlbaController.get_nsm_cluster_name(alba_backend=service.nsm_service.alba_backend, number=service.nsm_service.number) if Configuration.exists('/ovs/arakoon/{0}/config'.format(cluster_name), raw=True) is False: continue arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name) if arakoon_metadata['internal'] is True: arakoon_services.append('ovs-{0}'.format(service.name)) config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=False) config.load_config() if len(config.nodes) < 3: if service.type.name == ServiceType.SERVICE_TYPES.NS_MGR: arakoon_downtime.append(['backend', service.nsm_service.alba_backend.name]) else: arakoon_downtime.append(['backend', service.abm_service.alba_backend.name]) for package_name, package_info in storagerouter.package_information[key].iteritems(): if package_name not in AlbaUpdateController.alba_plugin_packages: continue # Only gather information for the core packages information[key]['services_post_update'].update(package_info.pop('services_to_restart')) if package_name not in information[key]['packages']: information[key]['packages'][package_name] = {} information[key]['packages'][package_name].update(package_info) if package_name == 'openvstorage-backend': if ['gui', None] not in information[key]['downtime']: information[key]['downtime'].append(['gui', None]) if ['api', None] not in information[key]['downtime']: information[key]['downtime'].append(['api', None]) information[key]['services_stop_start'].update({'watcher-framework', 'memcached'}) elif package_name == 'alba': for down in arakoon_downtime: if down not in information[key]['downtime']: information[key]['downtime'].append(down) information[key]['services_post_update'].update(arakoon_services) elif package_name == 'arakoon': if key == 'framework': framework_arakoons = set() if arakoon_ovs_info['internal'] is True: framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_ovs_info['name'])) if arakoon_cacc_info['internal'] is True: framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_cacc_info['name'])) information[key]['services_post_update'].update(framework_arakoons) if arakoon_ovs_info['down'] is True and ['ovsdb', None] not in information[key]['downtime']: information[key]['downtime'].append(['ovsdb', None]) else: for down in arakoon_downtime: if down not in information[key]['downtime']: information[key]['downtime'].append(down) information[key]['services_post_update'].update(arakoon_services) for alba_node in AlbaNodeList.get_albanodes(): for package_name, package_info in alba_node.package_information.get(key, {}).iteritems(): if package_name not in AlbaUpdateController.sdm_packages: continue # Only gather information for the SDM packages information[key]['services_post_update'].update(package_info.pop('services_to_restart')) if package_name not in information[key]['packages']: information[key]['packages'][package_name] = {} information[key]['packages'][package_name].update(package_info) return information
def get_alba_nodes(): """ Retrieve all ALBA nodes :return: Data-object list of ALBA nodes """ return AlbaNodeList.get_albanodes()
def migrate(previous_version): """ Migrates from a given version to the current version. It uses 'previous_version' to be smart wherever possible, but the code should be able to migrate any version towards the expected version. When this is not possible, the code can set a minimum version and raise when it is not met. :param previous_version: The previous version from which to start the migration :type previous_version: float """ working_version = previous_version if working_version == 0: # Initial version: # * Add any basic configuration or model entries # Add backends for backend_type_info in [('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in [ServiceType.SERVICE_TYPES.NS_MGR, ServiceType.SERVICE_TYPES.ALBA_MGR]: service_type = ServiceType() service_type.name = service_type_info service_type.save() # From here on, all actual migration should happen to get to the expected state for THIS RELEASE elif working_version < ALBAMigrator.THIS_VERSION: # Migrate unique constraints import hashlib from ovs.dal.helpers import HybridRunner, Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() classname = cls.__name__.lower() unique_key = 'ovs_unique_{0}_{{0}}_'.format(classname) uniques = [] # noinspection PyProtectedMember for prop in cls._properties: if prop.unique is True and len([k for k in client.prefix(unique_key.format(prop.name))]) == 0: uniques.append(prop.name) if len(uniques) > 0: prefix = 'ovs_data_{0}_'.format(classname) for key in client.prefix(prefix): data = client.get(key) for property_name in uniques: ukey = '{0}{1}'.format(unique_key.format(property_name), hashlib.sha1(str(data[property_name])).hexdigest()) client.set(ukey, key) # Changes on AlbaNodes & AlbaDisks from ovs.dal.lists.albanodelist import AlbaNodeList storagerouter_guids = [] for alba_node in AlbaNodeList.get_albanodes(): # StorageRouter - AlbaNode 1-to-many relation changes to 1-to-1 if alba_node.storagerouter_guid is not None: if alba_node.storagerouter_guid in storagerouter_guids: alba_node.storagerouter = None alba_node.save() else: storagerouter_guids.append(alba_node.storagerouter_guid) # Complete rework of the way we detect devices to assign roles or use as ASD # Allow loop-, raid-, nvme-, ??-devices and logical volumes as ASD # More info: https://github.com/openvstorage/framework/issues/792 for alba_disk in alba_node.disks: if alba_disk.aliases is not None: continue if 'name' in alba_disk._data: alba_disk.aliases = ['/dev/disk/by-id/{0}'.format(alba_disk._data['name'])] alba_disk.save() return ALBAMigrator.THIS_VERSION
def _package_install_plugin_alba(cls, components=None): """ Update the packages related to the ASD manager :param components: Components which have been selected for update :type components: list :return: Boolean indicating whether to continue with the update or not :rtype: bool """ cls._logger.info('Updating packages for ALBA plugin') if components is None: components = [PackageFactory.COMP_ALBA] abort = False alba_nodes = sorted( AlbaNodeList.get_albanodes_by_type(AlbaNode.NODE_TYPES.ASD), key=lambda an: ExtensionsToolbox.advanced_sort(element=an.ip, separator='.')) for alba_node in alba_nodes: cls._logger.debug('ALBA Node {0}: Verifying packages'.format( alba_node.ip)) for component in components: packages = alba_node.package_information.get( component, {}).get('packages', {}) package_names = sorted(packages) # Always install the extensions package first if PackageFactory.PKG_OVS_EXTENSIONS in package_names: package_names.remove(PackageFactory.PKG_OVS_EXTENSIONS) package_names.insert(0, PackageFactory.PKG_OVS_EXTENSIONS) if len(package_names) > 0: cls._logger.debug( 'ALBA Node {0}: Packages for component {1}: {2}'. format(alba_node.ip, component, package_names)) for package_name in package_names: try: installed = packages[package_name]['installed'] candidate = packages[package_name]['candidate'] if candidate == alba_node.client.update_installed_version_package( package_name=package_name): # Package has already been installed by another hook continue cls._logger.debug( 'ALBA Node {0}: Updating package {1} ({2} --> {3})' .format(alba_node.ip, package_name, installed, candidate)) alba_node.client.execute_update(package_name) cls._logger.debug( 'ALBA Node {0}: Updated package {1}'.format( alba_node.ip, package_name)) except requests.ConnectionError as ce: if 'Connection aborted.' not in ce.message: # This error is thrown due the post-update code of the SDM package which restarts the asd-manager service cls._logger.exception( 'ALBA Node {0}: Failed to update package {1}'. format(alba_node.ip, package_name)) abort = True except Exception: cls._logger.exception( 'ALBA Node {0}: Failed to update package {1}'. format(alba_node.ip, package_name)) abort = True if abort is False: cls._logger.info('Updated packages for ALBA plugin') return abort
def get_backend_stats(): """ Send backend stats for each backend to InfluxDB """ points = [] abms = [] abs = [] for service in ServiceList.get_services(): if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR: abms.append(service.name) for ab in AlbaNodeList.get_albanodes(): abs.append(ab.node_id) abms = list(set(abms)) config = "etcd://127.0.0.1:2379/ovs/arakoon/{}/config".format(abms[0]) try: decommissioning_osds = AlbaCLI.run('list-decommissioning-osds', config=config, to_json=True) except Exception as ex: StatsmonkeyScheduledTaskController._logger.error('{0}'.format(ex.message)) return None filtered_osds = [] for ab in abs: filtered_osds += [osd for osd in decommissioning_osds if osd['node_id'] == ab] abl = AlbaBackendList.get_albabackends() for ab in abl: try: stat = { 'measurement': 'backend_stats', 'tags': { 'backend_name': ab.name }, 'fields': { 'gets': ab.statistics['multi_get']['n'], 'puts': ab.statistics['apply']['n'] } } stat_asd = { 'decommissioning': len(filtered_osds), 'decommissioned': 0, 'claimed': 0, 'warning': 0, 'failure': 0, 'error': 0 } for disks in ab.local_stack.values(): for disk in disks.values(): for asd in disk['asds'].values(): if asd['alba_backend_guid'] == ab.guid: status = asd['status'] status_detail = asd['status_detail'] if status_detail == 'decommissioned': status = status_detail if status not in stat_asd: stat_asd[status] = 0 stat_asd[status] += 1 for status in stat_asd: stat['fields'][status] = stat_asd[status] points.append(stat) except Exception as ex: StatsmonkeyScheduledTaskController._logger.error(ex.message) if len(points) == 0: StatsmonkeyScheduledTaskController._logger.info("No statistics found") return None StatsmonkeyScheduledTaskController._send_stats(points) return points
def _post_update_alba_plugin_alba(cls, components): """ Execute some functionality after the ALBA plugin packages have been updated for the ASD manager nodes :param components: Update components which have been executed :type components: list :return: None :rtype: NoneType """ if PackageFactory.COMP_ALBA not in components: return # First run post-update migrations to update services, config mgmt, ... and restart services afterwards for method_name in ['migrate', 'migrate_sdm']: try: # noinspection PyUnresolvedReferences from ovs.lib.albamigration import AlbaMigrationController cls._logger.debug( 'Executing migration code: AlbaMigrationController.{0}()'. format(method_name)) getattr(AlbaMigrationController, method_name)() except ImportError: cls._logger.error('Could not import AlbaMigrationController') except Exception: cls._logger.exception( 'Migration code for the ALBA plugin failed to be executed') # Update ALBA nodes method_name = inspect.currentframe().f_code.co_name cls._logger.info('Executing hook {0}'.format(method_name)) alba_nodes = sorted( AlbaNodeList.get_albanodes_by_type(AlbaNode.NODE_TYPES.ASD), key=lambda an: ExtensionsToolbox.advanced_sort(element=an.ip, separator='.')) for alba_node in alba_nodes: services_to_restart = [] for component in components: if component not in alba_node.package_information: continue component_info = alba_node.package_information[component] if 'services_post_update' not in component_info: # Package_information still has the old format, so refresh update information # This can occur when updating from earlier than 2.11.0 to 2.11.0 and older try: GenericController.refresh_package_information() except: cls._logger.exception( '{0}: Refreshing package information failed'. format(alba_node.ip)) alba_node.discard() component_info = alba_node.package_information.get( component, {}) services_post_update = dict( (int(key), value) for key, value in component_info.get( 'services_post_update', {}).iteritems()) for restart_order in sorted(services_post_update): for service_name in sorted( services_post_update[restart_order]): if service_name not in services_to_restart: services_to_restart.append(service_name) if len(services_to_restart) > 0: alba_node.client.restart_services( service_names=services_to_restart) # Renew maintenance services cls._logger.info('Checkup maintenance agents') AlbaController.checkup_maintenance_agents.delay() cls._logger.info('Executed hook {0}'.format(method_name))
def _all_disks(self): """ Returns a live list of all disks known to this AlbaBackend """ from ovs.dal.lists.albanodelist import AlbaNodeList from ovs.dal.lists.albabackendlist import AlbaBackendList alba_backend_map = {} for a_backend in AlbaBackendList.get_albabackends(): alba_backend_map[a_backend.alba_id] = a_backend node_disk_map = {} alba_nodes = AlbaNodeList.get_albanodes() for node in alba_nodes: node_disk_map[node.node_id] = [] # Load OSDs config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}-abm/config'.format(self.backend.name) for found_osd in AlbaCLI.run('list-all-osds', config=config, as_json=True): node_id = found_osd['node_id'] if node_id in node_disk_map: node_disk_map[node_id].append({'osd': found_osd}) # Load all_disk information def _load_disks(_node, _list): for _disk in _node.all_disks: found = False for container in _list: if 'osd' in container and container['osd']['long_id'] == _disk.get('asd_id'): container['disk'] = _disk found = True break if found is False: _list.append({'disk': _disk}) threads = [] for node in alba_nodes: thread = Thread(target=_load_disks, args=(node, node_disk_map[node.node_id])) thread.start() threads.append(thread) for thread in threads: thread.join() # Make mapping between node IDs and the relevant OSDs and disks def _process_disk(_info, _disks, _node): disk = _info.get('disk') if disk is None: return disk_status = 'uninitialized' disk_status_detail = '' disk_alba_backend_guid = '' if disk['available'] is False: osd = _info.get('osd') disk_alba_state = disk['state']['state'] if disk_alba_state == 'ok': if osd is None: disk_status = 'initialized' elif osd['id'] is None: alba_id = osd['alba_id'] if alba_id is None: disk_status = 'available' else: disk_status = 'unavailable' alba_backend = alba_backend_map.get(alba_id) if alba_backend is not None: disk_alba_backend_guid = alba_backend.guid else: disk_status = 'error' disk_status_detail = 'communicationerror' disk_alba_backend_guid = self.guid for asd in _node.asds: if asd.asd_id == disk['asd_id'] and asd.statistics != {}: disk_status = 'warning' disk_status_detail = 'recenterrors' read = osd['read'] or [0] write = osd['write'] or [0] errors = osd['errors'] global_interval_key = '/ovs/alba/backends/global_gui_error_interval' backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(self.guid) interval = EtcdConfiguration.get(global_interval_key) if EtcdConfiguration.exists(backend_interval_key): interval = EtcdConfiguration.get(backend_interval_key) if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval): disk_status = 'claimed' disk_status_detail = '' elif disk_alba_state == 'decommissioned': disk_status = 'unavailable' disk_status_detail = 'decommissioned' else: disk_status = 'error' disk_status_detail = disk['state']['detail'] alba_backend = alba_backend_map.get(osd.get('alba_id')) if alba_backend is not None: disk_alba_backend_guid = alba_backend.guid disk['status'] = disk_status disk['status_detail'] = disk_status_detail disk['alba_backend_guid'] = disk_alba_backend_guid _disks.append(disk) def _worker(_queue, _disks): while True: try: item = _queue.get(False) _process_disk(item['info'], _disks, item['node']) except Empty: return queue = Queue() for node in alba_nodes: for info in node_disk_map[node.node_id]: queue.put({'info': info, 'node': node}) disks = [] threads = [] for i in range(5): thread = Thread(target=_worker, args=(queue, disks)) thread.start() threads.append(thread) for thread in threads: thread.join() return disks
def _local_stack(self): """ Returns a live list of all disks known to this AlbaBackend """ from ovs.dal.lists.albanodelist import AlbaNodeList from ovs.dal.lists.albabackendlist import AlbaBackendList if len(self.abm_services) == 0: return {} # No ABM services yet, so backend not fully installed yet alba_backend_map = {} for alba_backend in AlbaBackendList.get_albabackends(): alba_backend_map[alba_backend.alba_id] = alba_backend # Load information based on the model asd_map = {} storage_map = {} alba_nodes = AlbaNodeList.get_albanodes() for node in alba_nodes: node_id = node.node_id storage_map[node_id] = {} for disk in node.disks: disk_id = disk.aliases[0].split('/')[-1] storage_map[node_id][disk_id] = {'asds': {}, 'name': disk_id, 'guid': disk.guid, 'status': 'error', 'aliases': disk.aliases, 'status_detail': 'unknown'} for osd in disk.osds: osd_id = osd.osd_id data = {'asd_id': osd_id, 'guid': osd.guid, 'status': 'error', 'status_detail': 'unknown', 'alba_backend_guid': osd.alba_backend_guid} asd_map[osd_id] = data storage_map[node_id][disk_id]['asds'][osd_id] = data # Load information from node def _load_live_info(_node, _node_data): _data = _node.storage_stack if _data['status'] != 'ok': for disk_entry in _node_data.values(): disk_entry['status_detail'] = _data['status'] for entry in disk_entry.get('asds', {}).values(): entry['status_detail'] = _data['status'] else: for _disk_id, disk_asd_info in _data['stack'].iteritems(): if _disk_id not in _node_data: _node_data[_disk_id] = {'asds': {}} entry = _node_data[_disk_id] disk_info = copy.deepcopy(disk_asd_info) del disk_info['asds'] entry.update(disk_info) asds_info = disk_asd_info['asds'] for _asd_id, asd_info in asds_info.iteritems(): if _asd_id not in _node_data[_disk_id]['asds']: _node_data[_disk_id]['asds'][_asd_id] = asd_info else: _node_data[_disk_id]['asds'][_asd_id].update(asd_info) threads = [] for node in alba_nodes: thread = Thread(target=_load_live_info, args=(node, storage_map[node.node_id])) thread.start() threads.append(thread) for thread in threads: thread.join() # Mix in usage information for asd_id, stats in self.asd_statistics.iteritems(): if asd_id in asd_map: asd_map[asd_id]['usage'] = {'size': int(stats['capacity']), 'used': int(stats['disk_usage']), 'available': int(stats['capacity'] - stats['disk_usage'])} # Load information from alba backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(self.guid) if Configuration.exists(backend_interval_key): interval = Configuration.get(backend_interval_key) else: interval = Configuration.get('/ovs/alba/backends/global_gui_error_interval') config = Configuration.get_configuration_path('/ovs/arakoon/{0}-abm/config'.format(self.name)) asds = {} for found_osd in AlbaCLI.run(command='list-all-osds', config=config): asds[found_osd['long_id']] = found_osd for node_data in storage_map.values(): for _disk in node_data.values(): for asd_id, asd_data in _disk['asds'].iteritems(): if asd_id not in asds: continue found_osd = asds[asd_id] if 'state' not in asd_data: continue if found_osd.get('decommissioned') is True: asd_data['status'] = 'unavailable' asd_data['status_detail'] = 'decommissioned' continue state = asd_data['state'] if state == 'ok': if found_osd['id'] is None: alba_id = found_osd['alba_id'] if alba_id is None: asd_data['status'] = 'available' else: asd_data['status'] = 'unavailable' alba_backend = alba_backend_map.get(alba_id) if alba_backend is not None: asd_data['alba_backend_guid'] = alba_backend.guid else: asd_data['alba_backend_guid'] = self.guid asd_data['status'] = 'warning' asd_data['status_detail'] = 'recenterrors' read = found_osd['read'] or [0] write = found_osd['write'] or [0] errors = found_osd['errors'] if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval): asd_data['status'] = 'claimed' asd_data['status_detail'] = '' else: asd_data['status'] = 'error' asd_data['status_detail'] = asd_data.get('state_detail', '') alba_backend = alba_backend_map.get(found_osd.get('alba_id')) if alba_backend is not None: asd_data['alba_backend_guid'] = alba_backend.guid return storage_map
def merge_package_information_alba_plugin(): """ Retrieve the package information for the ALBA plugin, so the core code can merge it all together :return: Package information for ALBA nodes """ return dict((node.ip, node.package_information) for node in AlbaNodeList.get_albanodes())
def _live_status(self): """ Retrieve the live status of the ALBA Backend to be displayed in the 'Backends' page in the GUI based on: - Maintenance agents presence - Maintenance agents status - Disk statuses :return: Status as reported by the plugin :rtype: str """ if self.backend.status == Backend.STATUSES.INSTALLING: return 'installing' if self.backend.status == Backend.STATUSES.DELETING: return 'deleting' # Verify failed disks devices = self.local_summary['devices'] if devices['red'] > 0: self._logger.warning( 'AlbaBackend {0} STATUS set to FAILURE due to {1} failed disks' .format(self.name, devices['red'])) return AlbaBackend.STATUSES.FAILURE # Verify remote OSDs remote_errors = False linked_backend_warning = False for remote_info in self.remote_stack.itervalues(): if remote_info['error'] == 'unknown' or remote_info[ 'live_status'] == AlbaBackend.STATUSES.FAILURE: message = None if remote_info['error'] == 'unknown': message = 'unknown remote error info' elif remote_info[ 'live_status'] == AlbaBackend.STATUSES.FAILURE: message = 'FAILURE in live_status' self._logger.warning( 'AlbaBackend {0} STATUS set to FAILURE due to OSD {1}: {2} ' .format(self.name, remote_info['name'], message)) return AlbaBackend.STATUSES.FAILURE if remote_info['error'] == 'not_allowed': remote_errors = True if remote_info['live_status'] == AlbaBackend.STATUSES.WARNING: linked_backend_warning = True # Retrieve ASD and maintenance service information def _get_node_information(_node): if _node not in nodes_used_by_this_backend: for slot_info in _node.stack.itervalues(): for osd_info in slot_info['osds'].itervalues(): if osd_info['claimed_by'] == self.guid: nodes_used_by_this_backend.add(_node) break if _node in nodes_used_by_this_backend: break try: services = _node.maintenance_services if self.name in services: for _service_name, _service_status in services[self.name]: services_for_this_backend[_service_name] = _node service_states[_service_name] = _service_status if _node.node_id not in services_per_node: services_per_node[_node.node_id] = 0 services_per_node[_node.node_id] += 1 except Exception: pass services_for_this_backend = {} services_per_node = {} service_states = {} nodes_used_by_this_backend = set() threads = [] all_nodes = AlbaNodeList.get_albanodes() for node in all_nodes: thread = Thread(target=_get_node_information, args=(node, )) thread.start() threads.append(thread) for thread in threads: thread.join() zero_services = False if len(services_for_this_backend) == 0: if len(all_nodes) > 0: AlbaBackend._logger.error( 'AlbaBackend {0} STATUS set to FAILURE due to no maintenance services' .format(self.name)) return AlbaBackend.STATUSES.FAILURE zero_services = True # Verify maintenance agents status for service_name, node in services_for_this_backend.iteritems(): try: service_status = service_states.get(service_name) if service_status is None or service_status != 'active': AlbaBackend._logger.error( 'AlbaBackend {0} STATUS set to FAILURE due to non-running maintenance service(s): {1}' .format(self.name, service_name)) return AlbaBackend.STATUSES.FAILURE except Exception: pass # Verify maintenance agents presence layout_key = '/ovs/alba/backends/{0}/maintenance/agents_layout'.format( self.guid) layout = None if Configuration.exists(layout_key): layout = Configuration.get(layout_key) if not isinstance(layout, list) or not any( node.node_id for node in all_nodes if node.node_id in layout): layout = None if layout is None: config_key = '/ovs/alba/backends/{0}/maintenance/nr_of_agents'.format( self.guid) expected_services = 3 if Configuration.exists(config_key): expected_services = Configuration.get(config_key) expected_services = min(expected_services, len(nodes_used_by_this_backend)) or 1 if len(services_for_this_backend) < expected_services: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": insufficient maintenance services' .format(self.name)) return AlbaBackend.STATUSES.WARNING else: for node_id in layout: if node_id not in services_per_node: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": invalid maintenance service layout' .format(self.name)) return AlbaBackend.STATUSES.WARNING # Verify local and remote OSDs if devices['orange'] > 0: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": one or more OSDs in warning' .format(self.name)) return AlbaBackend.STATUSES.WARNING if remote_errors is True or linked_backend_warning is True: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": errors/warnings on remote stack' .format(self.name)) return AlbaBackend.STATUSES.WARNING if zero_services is True: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": no maintenance services' .format(self.name)) return AlbaBackend.STATUSES.WARNING return AlbaBackend.STATUSES.RUNNING
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... """ AlbaMigrationController._logger.info( 'Preparing out of band migrations...') from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.albabackendlist import AlbaBackendList from ovs.dal.lists.albanodelist import AlbaNodeList from ovs.dal.lists.albaosdlist import AlbaOSDList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient, UnableToConnectException from ovs.extensions.migration.migration.albamigrator import ExtensionMigrator from ovs.extensions.packages.albapackagefactory import PackageFactory from ovs.extensions.services.albaservicefactory import ServiceFactory from ovs.extensions.plugins.albacli import AlbaCLI, AlbaError from ovs.lib.alba import AlbaController from ovs.lib.disk import DiskController AlbaMigrationController._logger.info('Start out of band migrations...') ############################################# # Introduction of IP:port combination on OSDs osd_info_map = {} alba_backends = AlbaBackendList.get_albabackends() for alba_backend in alba_backends: AlbaMigrationController._logger.info( 'Verifying ALBA Backend {0}'.format(alba_backend.name)) if alba_backend.abm_cluster is None: AlbaMigrationController._logger.warning( 'ALBA Backend {0} does not have an ABM cluster registered'. format(alba_backend.name)) continue AlbaMigrationController._logger.debug( 'Retrieving configuration path for ALBA Backend {0}'.format( alba_backend.name)) try: config = Configuration.get_configuration_path( alba_backend.abm_cluster.config_location) except: AlbaMigrationController._logger.exception( 'Failed to retrieve the configuration path for ALBA Backend {0}' .format(alba_backend.name)) continue AlbaMigrationController._logger.info( 'Retrieving OSD information for ALBA Backend {0}'.format( alba_backend.name)) try: osd_info = AlbaCLI.run(command='list-all-osds', config=config) except (AlbaError, RuntimeError): AlbaMigrationController._logger.exception( 'Failed to retrieve OSD information for ALBA Backend {0}'. format(alba_backend.name)) continue for osd_info in osd_info: if osd_info.get('long_id'): osd_info_map[osd_info['long_id']] = { 'ips': osd_info.get('ips', []), 'port': osd_info.get('port') } for osd in AlbaOSDList.get_albaosds(): if osd.osd_id not in osd_info_map: AlbaMigrationController._logger.warning( 'OSD with ID {0} is modelled but could not be found through ALBA' .format(osd.osd_id)) continue ips = osd_info_map[osd.osd_id]['ips'] port = osd_info_map[osd.osd_id]['port'] changes = False if osd.ips is None: changes = True osd.ips = ips if osd.port is None: changes = True osd.port = port if changes is True: AlbaMigrationController._logger.info( 'Updating OSD with ID {0} with IPS {1} and port {2}'. format(osd.osd_id, ips, port)) osd.save() ################################################### # Read preference for GLOBAL ALBA Backends (1.10.3) (https://github.com/openvstorage/framework-alba-plugin/issues/452) if Configuration.get(key='/ovs/framework/migration|read_preference', default=False) is False: try: name_backend_map = dict((alba_backend.name, alba_backend) for alba_backend in alba_backends) for alba_node in AlbaNodeList.get_albanodes(): AlbaMigrationController._logger.info( 'Processing maintenance services running on ALBA Node {0} with ID {1}' .format(alba_node.ip, alba_node.node_id)) alba_node.invalidate_dynamics('maintenance_services') for alba_backend_name, services in alba_node.maintenance_services.iteritems( ): if alba_backend_name not in name_backend_map: AlbaMigrationController._logger.error( 'ALBA Node {0} has services for an ALBA Backend {1} which is not modelled' .format(alba_node.ip, alba_backend_name)) continue alba_backend = name_backend_map[alba_backend_name] AlbaMigrationController._logger.info( 'Processing {0} ALBA Backend {1} with GUID {2}'. format(alba_backend.scaling, alba_backend.name, alba_backend.guid)) if alba_backend.scaling == alba_backend.SCALINGS.LOCAL: read_preferences = [alba_node.node_id] else: read_preferences = AlbaController.get_read_preferences_for_global_backend( alba_backend=alba_backend, alba_node_id=alba_node.node_id, read_preferences=[]) for service_name, _ in services: AlbaMigrationController._logger.info( 'Processing service {0}'.format(service_name)) old_config_key = '/ovs/alba/backends/{0}/maintenance/config'.format( alba_backend.guid) new_config_key = '/ovs/alba/backends/{0}/maintenance/{1}/config'.format( alba_backend.guid, service_name) if Configuration.exists(key=old_config_key): new_config = Configuration.get( key=old_config_key) new_config[ 'read_preference'] = read_preferences Configuration.set(key=new_config_key, value=new_config) for alba_backend in alba_backends: Configuration.delete( key='/ovs/alba/backends/{0}/maintenance/config'.format( alba_backend.guid)) AlbaController.checkup_maintenance_agents.delay() Configuration.set( key='/ovs/framework/migration|read_preference', value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating read preferences for ALBA Backends failed') ####################################################### # Storing actual package name in version files (1.11.0) (https://github.com/openvstorage/framework/issues/1876) changed_clients = set() storagerouters = StorageRouterList.get_storagerouters() if Configuration.get( key= '/ovs/framework/migration|actual_package_name_in_version_file_alba', default=False) is False: try: service_manager = ServiceFactory.get_manager() alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for( component=PackageFactory.COMP_ALBA) for storagerouter in storagerouters: try: root_client = SSHClient( endpoint=storagerouter.ip, username='******' ) # Use '.ip' instead of StorageRouter object because this code is executed during post-update at which point the heartbeat has not been updated for some time except UnableToConnectException: AlbaMigrationController._logger.exception( 'Updating actual package name for version files failed on StorageRouter {0}' .format(storagerouter.ip)) continue for file_name in root_client.file_list( directory=ServiceFactory.RUN_FILE_DIR): if not file_name.endswith('.version'): continue file_path = '{0}/{1}'.format( ServiceFactory.RUN_FILE_DIR, file_name) contents = root_client.file_read(filename=file_path) if alba_pkg_name == PackageFactory.PKG_ALBA_EE and '{0}='.format( PackageFactory.PKG_ALBA) in contents: # Rewrite the version file in the RUN_FILE_DIR contents = contents.replace( PackageFactory.PKG_ALBA, PackageFactory.PKG_ALBA_EE) root_client.file_write(filename=file_path, contents=contents) # Regenerate the service and update the EXTRA_VERSION_CMD in the configuration management service_name = file_name.split('.')[0] service_config_key = ServiceFactory.SERVICE_CONFIG_KEY.format( storagerouter.machine_id, service_name) if Configuration.exists(key=service_config_key): service_config = Configuration.get( key=service_config_key) if 'EXTRA_VERSION_CMD' in service_config: service_config[ 'EXTRA_VERSION_CMD'] = '{0}=`{1}`'.format( alba_pkg_name, alba_version_cmd) Configuration.set(key=service_config_key, value=service_config) service_manager.regenerate_service( name='ovs-arakoon', client=root_client, target_name='ovs-{0}'.format( service_name) ) # Leave out .version changed_clients.add(root_client) Configuration.set( key= '/ovs/framework/migration|actual_package_name_in_version_file_alba', value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating actual package name for version files failed') for root_client in changed_clients: try: root_client.run(['systemctl', 'daemon-reload']) except Exception: AlbaMigrationController._logger.exception( 'Executing command "systemctl daemon-reload" failed') #################################### # Fix for migration version (1.11.0) # Previous code could potentially store a higher version number in the config management than the actual version number if Configuration.get( key='/ovs/framework/migration|alba_migration_version_fix', default=False) is False: try: for storagerouter in storagerouters: config_key = '/ovs/framework/hosts/{0}/versions'.format( storagerouter.machine_id) if Configuration.exists(key=config_key): versions = Configuration.get(key=config_key) if versions.get(PackageFactory.COMP_MIGRATION_ALBA, 0) > ExtensionMigrator.THIS_VERSION: versions[ PackageFactory. COMP_MIGRATION_ALBA] = ExtensionMigrator.THIS_VERSION Configuration.set(key=config_key, value=versions) Configuration.set( key='/ovs/framework/migration|alba_migration_version_fix', value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating migration version failed') #################################### # Enable auto-cleanup migration_auto_cleanup_key = '/ovs/framework/migration|alba_auto_cleanup' if Configuration.get(key=migration_auto_cleanup_key, default=False) is False: try: for storagerouter in StorageRouterList.get_storagerouters(): storagerouter.invalidate_dynamics( 'features') # New feature was added errors = [] for alba_backend in AlbaBackendList.get_albabackends(): try: AlbaController.set_auto_cleanup(alba_backend.guid) except Exception as ex: AlbaMigrationController._logger.exception( 'Failed to set the auto-cleanup for ALBA Backend {0}' .format(alba_backend.name)) errors.append(ex) if len(errors) == 0: Configuration.set(key=migration_auto_cleanup_key, value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating auto cleanup failed') #################################### # Change cache eviction migration_random_eviction_key = '/ovs/framework/migration|alba_cache_eviction_random' if Configuration.get(key=migration_random_eviction_key, default=False) is False: try: errors = [] for alba_backend in AlbaBackendList.get_albabackends(): try: AlbaController.set_cache_eviction(alba_backend.guid) except Exception as ex: AlbaMigrationController._logger.exception( 'Failed to set the auto-cleanup for ALBA Backend {0}' .format(alba_backend.name)) errors.append(ex) if len(errors) == 0: Configuration.set(key=migration_random_eviction_key, value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating auto cleanup failed') ################################################### # Sync all disks and apply the backend role. Backend role was removed with the AD (since 1.10) albanode_backend_role_sync_key = '/ovs/framework/migration|albanode_backend_role_sync' if not Configuration.get(key=albanode_backend_role_sync_key, default=False): try: errors = [] for alba_node in AlbaNodeList.get_albanodes(): try: if not alba_node.storagerouter: continue stack = alba_node.client.get_stack() # type: dict for slot_id, slot_information in stack.iteritems(): osds = slot_information.get('osds', {}) # type: dict slot_aliases = slot_information.get( 'aliases', []) # type: list if not osds: # No osds means no partition was made continue # Sync to add all potential partitions that will need a backend role DiskController.sync_with_reality( storagerouter_guid=alba_node.storagerouter_guid ) for disk in alba_node.storagerouter.disks: if set(disk.aliases).intersection( set(slot_aliases)): partition = disk.partitions[0] if DiskPartition.ROLES.BACKEND not in partition.roles: partition.roles.append( DiskPartition.ROLES.BACKEND) partition.save() except Exception as ex: AlbaMigrationController._logger.exception( 'Syncing for storagerouter/albanode {0} failed'. format(alba_node.storagerouter.ip)) errors.append(ex) if not errors: Configuration.set(key=albanode_backend_role_sync_key, value=True) except Exception: AlbaMigrationController._logger.exception( 'Syncing up the disks for backend roles failed') AlbaMigrationController._logger.info('Finished out of band migrations')
def migrate(previous_version): """ Migrates from a given version to the current version. It uses 'previous_version' to be smart wherever possible, but the code should be able to migrate any version towards the expected version. When this is not possible, the code can set a minimum version and raise when it is not met. :param previous_version: The previous version from which to start the migration :type previous_version: float """ working_version = previous_version if working_version == 0: from ovs.dal.hybrids.servicetype import ServiceType # Initial version: # * Add any basic configuration or model entries # Add backends for backend_type_info in [('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in [ ServiceType.SERVICE_TYPES.NS_MGR, ServiceType.SERVICE_TYPES.ALBA_MGR, ServiceType.SERVICE_TYPES.ALBA_S3_TRANSACTION ]: service_type = ServiceType() service_type.name = service_type_info service_type.save() # From here on, all actual migration should happen to get to the expected state for THIS RELEASE elif working_version < DALMigrator.THIS_VERSION: import hashlib from ovs.dal.exceptions import ObjectNotFoundException from ovs.dal.helpers import HybridRunner, Descriptor from ovs.dal.hybrids.albaabmcluster import ABMCluster from ovs.dal.hybrids.albaosd import AlbaOSD from ovs.dal.hybrids.albansmcluster import NSMCluster from ovs.dal.hybrids.j_abmservice import ABMService from ovs.dal.hybrids.j_nsmservice import NSMService from ovs.dal.hybrids.service import Service from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.lists.albabackendlist import AlbaBackendList from ovs.dal.lists.albanodelist import AlbaNodeList from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.db.arakooninstaller import ArakoonClusterConfig, ArakoonInstaller from ovs.extensions.generic.configuration import Configuration, NotFoundException from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs.extensions.plugins.albacli import AlbaCLI from ovs.extensions.storage.persistentfactory import PersistentFactory # Migrate unique constraints & indexes client = PersistentFactory.get_client() hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() classname = cls.__name__.lower() unique_key = 'ovs_unique_{0}_{{0}}_'.format(classname) index_prefix = 'ovs_index_{0}|{{0}}|'.format(classname) index_key = 'ovs_index_{0}|{{0}}|{{1}}'.format(classname) uniques = [] indexes = [] # noinspection PyProtectedMember for prop in cls._properties: if prop.unique is True and len([ k for k in client.prefix( unique_key.format(prop.name)) ]) == 0: uniques.append(prop.name) if prop.indexed is True and len([ k for k in client.prefix( index_prefix.format(prop.name)) ]) == 0: indexes.append(prop.name) if len(uniques) > 0 or len(indexes) > 0: prefix = 'ovs_data_{0}_'.format(classname) for key, data in client.prefix_entries(prefix): for property_name in uniques: ukey = '{0}{1}'.format( unique_key.format(property_name), hashlib.sha1(str( data[property_name])).hexdigest()) client.set(ukey, key) for property_name in indexes: if property_name not in data: continue # This is the case when there's a new indexed property added. ikey = index_key.format( property_name, hashlib.sha1(str( data[property_name])).hexdigest()) index = list( client.get_multi([ikey], must_exist=False))[0] transaction = client.begin_transaction() if index is None: client.assert_value(ikey, None, transaction=transaction) client.set(ikey, [key], transaction=transaction) elif key not in index: client.assert_value(ikey, index[:], transaction=transaction) client.set(ikey, index + [key], transaction=transaction) client.apply_transaction(transaction) ############################################# # Introduction of ABMCluster and NSMCluster # ############################################# # Verify presence of unchanged ALBA Backends alba_backends = AlbaBackendList.get_albabackends() changes_required = False for alba_backend in alba_backends: if alba_backend.abm_cluster is None or len( alba_backend.nsm_clusters) == 0: changes_required = True break if changes_required: # Retrieve ABM and NSM clusters abm_cluster_info = [] nsm_cluster_info = [] for cluster_name in Configuration.list('/ovs/arakoon'): try: metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=cluster_name) if metadata[ 'cluster_type'] == ServiceType.ARAKOON_CLUSTER_TYPES.ABM: abm_cluster_info.append(metadata) elif metadata[ 'cluster_type'] == ServiceType.ARAKOON_CLUSTER_TYPES.NSM: nsm_cluster_info.append(metadata) except NotFoundException: continue # Retrieve NSM Arakoon cluster information cluster_arakoon_map = {} for cluster_info in abm_cluster_info + nsm_cluster_info: cluster_name = cluster_info['cluster_name'] arakoon_config = ArakoonClusterConfig( cluster_id=cluster_name) cluster_arakoon_map[ cluster_name] = arakoon_config.export_dict() storagerouter_map = dict( (storagerouter.machine_id, storagerouter) for storagerouter in StorageRouterList.get_storagerouters()) alba_backend_id_map = dict((alba_backend.alba_id, alba_backend) for alba_backend in alba_backends) for cluster_info in abm_cluster_info: internal = cluster_info['internal'] cluster_name = cluster_info['cluster_name'] config_location = Configuration.get_configuration_path( key=ArakoonClusterConfig.CONFIG_KEY.format( cluster_name)) try: alba_id = AlbaCLI.run(command='get-alba-id', config=config_location, named_params={'attempts': 3})['id'] nsm_hosts = AlbaCLI.run(command='list-nsm-hosts', config=config_location, named_params={'attempts': 3}) except RuntimeError: continue alba_backend = alba_backend_id_map.get(alba_id) if alba_backend is None: # ALBA Backend with ID not found in model continue if alba_backend.abm_cluster is not None and len( alba_backend.nsm_clusters ) > 0: # Clusters already exist continue # Create ABM Cluster if alba_backend.abm_cluster is None: abm_cluster = ABMCluster() abm_cluster.name = cluster_name abm_cluster.alba_backend = alba_backend abm_cluster.config_location = ArakoonClusterConfig.CONFIG_KEY.format( cluster_name) abm_cluster.save() else: abm_cluster = alba_backend.abm_cluster # Create ABM Services abm_arakoon_config = cluster_arakoon_map[cluster_name] abm_arakoon_config.pop('global') arakoon_nodes = abm_arakoon_config.keys() if internal is False: services_to_create = 1 else: if set(arakoon_nodes).difference( set(storagerouter_map.keys())): continue services_to_create = len(arakoon_nodes) for index in range(services_to_create): service = Service() service.name = 'arakoon-{0}-abm'.format( alba_backend.name) service.type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ALBA_MGR) if internal is True: arakoon_node_config = abm_arakoon_config[ arakoon_nodes[index]] service.ports = [ arakoon_node_config['client_port'], arakoon_node_config['messaging_port'] ] service.storagerouter = storagerouter_map[ arakoon_nodes[index]] else: service.ports = [] service.storagerouter = None service.save() abm_service = ABMService() abm_service.service = service abm_service.abm_cluster = abm_cluster abm_service.save() # Create NSM Clusters for cluster_index, nsm_host in enumerate( sorted(nsm_hosts, key=lambda host: ExtensionsToolbox. advanced_sort(host['cluster_id'], '_'))): nsm_cluster_name = nsm_host['cluster_id'] nsm_arakoon_config = cluster_arakoon_map.get( nsm_cluster_name) if nsm_arakoon_config is None: continue number = cluster_index if internal is False else int( nsm_cluster_name.split('_')[-1]) nsm_cluster = NSMCluster() nsm_cluster.name = nsm_cluster_name nsm_cluster.number = number nsm_cluster.alba_backend = alba_backend nsm_cluster.config_location = ArakoonClusterConfig.CONFIG_KEY.format( nsm_cluster_name) nsm_cluster.save() # Create NSM Services nsm_arakoon_config.pop('global') arakoon_nodes = nsm_arakoon_config.keys() if internal is False: services_to_create = 1 else: if set(arakoon_nodes).difference( set(storagerouter_map.keys())): continue services_to_create = len(arakoon_nodes) for service_index in range(services_to_create): service = Service() service.name = 'arakoon-{0}-nsm_{1}'.format( alba_backend.name, number) service.type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.NS_MGR) if internal is True: arakoon_node_config = nsm_arakoon_config[ arakoon_nodes[service_index]] service.ports = [ arakoon_node_config['client_port'], arakoon_node_config['messaging_port'] ] service.storagerouter = storagerouter_map[ arakoon_nodes[service_index]] else: service.ports = [] service.storagerouter = None service.save() nsm_service = NSMService() nsm_service.service = service nsm_service.nsm_cluster = nsm_cluster nsm_service.save() # Clean up all junction services no longer linked to an ALBA Backend all_nsm_services = [ service.nsm_service for service in ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.NS_MGR).services if service.nsm_service.nsm_cluster is None ] all_abm_services = [ service.abm_service for service in ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ALBA_MGR).services if service.abm_service.abm_cluster is None ] for abm_service in all_abm_services: abm_service.delete() abm_service.service.delete() for nsm_service in all_nsm_services: nsm_service.delete() nsm_service.service.delete() ################################ # Introduction of Active Drive # ################################ # Update slot_id and Alba Node relation for all OSDs client = PersistentFactory.get_client() disk_osd_map = {} for key, data in client.prefix_entries('ovs_data_albaosd_'): alba_disk_guid = data.get('alba_disk', {}).get('guid') if alba_disk_guid is not None: if alba_disk_guid not in disk_osd_map: disk_osd_map[alba_disk_guid] = [] disk_osd_map[alba_disk_guid].append( key.replace('ovs_data_albaosd_', '')) try: value = client.get(key) value.pop('alba_disk', None) client.set(key=key, value=value) except Exception: pass # We don't care if we would have any leftover AlbaDisk information in _data, but its cleaner not to alba_guid_node_map = dict( (an.guid, an) for an in AlbaNodeList.get_albanodes()) for key, data in client.prefix_entries('ovs_data_albadisk_'): alba_disk_guid = key.replace('ovs_data_albadisk_', '') alba_node_guid = data.get('alba_node', {}).get('guid') if alba_disk_guid in disk_osd_map and alba_node_guid in alba_guid_node_map and len( data.get('aliases', [])) > 0: slot_id = data['aliases'][0].split('/')[-1] for osd_guid in disk_osd_map[alba_disk_guid]: try: osd = AlbaOSD(osd_guid) except ObjectNotFoundException: continue osd.slot_id = slot_id osd.alba_node = alba_guid_node_map[alba_node_guid] osd.save() client.delete(key=key, must_exist=False) # Remove unique constraints for AlbaNode IP for key in client.prefix('ovs_unique_albanode_ip_'): client.delete(key=key, must_exist=False) # Remove relation for all Alba Disks for key in client.prefix('ovs_reverseindex_albadisk_'): client.delete(key=key, must_exist=False) # Remove the relation between AlbaNode and AlbaDisk for key in client.prefix('ovs_reverseindex_albanode_'): if '|disks|' in key: client.delete(key=key, must_exist=False) return DALMigrator.THIS_VERSION
def _storage_stack(self): """ Returns a live list of all disks known to this AlbaBackend """ from ovs.dal.lists.albanodelist import AlbaNodeList from ovs.dal.lists.albabackendlist import AlbaBackendList if len(self.abm_services) == 0: return {} # No ABM services yet, so backend not fully installed yet storage_map = {} asd_map = {} alba_backend_map = {} for alba_backend in AlbaBackendList.get_albabackends(): alba_backend_map[alba_backend.alba_id] = alba_backend # Load information based on the model alba_nodes = AlbaNodeList.get_albanodes() for node in alba_nodes: node_id = node.node_id storage_map[node_id] = {} for disk in node.disks: disk_id = disk.name storage_map[node_id][disk_id] = {'name': disk_id, 'guid': disk.guid, 'status': 'error', 'status_detail': 'unknown', 'asds': {}} for asd in disk.asds: asd_id = asd.asd_id data = {'asd_id': asd_id, 'guid': asd.guid, 'status': 'error', 'status_detail': 'unknown', 'alba_backend_guid': asd.alba_backend_guid} asd_map[asd_id] = data storage_map[node_id][disk_id]['asds'][asd_id] = data # Load information from node def _load_live_info(_node, _node_data): # Live disk information try: disk_data = _node.client.get_disks() except (requests.ConnectionError, requests.Timeout): for entry in _node_data.values(): entry['status_detail'] = 'nodedown' disk_data = {} for _disk_id, disk_info in disk_data.iteritems(): if _disk_id in _node_data: entry = _node_data[_disk_id] else: entry = {'name': _disk_id, 'status': 'unknown', 'status_detail': '', 'asds': {}} _node_data[_disk_id] = entry entry.update(disk_info) if disk_info['state'] == 'ok': entry['status'] = 'uninitialized' if disk_info['available'] is True else 'initialized' entry['status_detail'] = '' else: entry['status'] = disk_info['state'] entry['status_detail'] = disk_info.get('state_detail', '') # Live ASD information try: _asd_data = _node.client.get_asds() except (requests.ConnectionError, requests.Timeout): for disk_entry in _node_data.values(): for entry in disk_entry['asds'].values(): entry['status_detail'] = 'nodedown' _asd_data = {} for _disk_id, asds in _asd_data.iteritems(): if _disk_id not in _node_data: continue for _asd_id, asd_info in asds.iteritems(): entry = {'asd_id': _asd_id, 'status': 'error' if asd_info['state'] == 'error' else 'initialized', 'status_detail': asd_info.get('state_detail', ''), 'state': asd_info['state'], 'state_detail': asd_info.get('state_detail', '')} if _asd_id not in _node_data[_disk_id]['asds']: _node_data[_disk_id]['asds'][_asd_id] = entry asd_map[_asd_id] = entry else: _node_data[_disk_id]['asds'][_asd_id].update(entry) threads = [] for node in alba_nodes: thread = Thread(target=_load_live_info, args=(node, storage_map[node.node_id])) thread.start() threads.append(thread) for thread in threads: thread.join() # Mix in usage information for asd_id, stats in self.asd_statistics.iteritems(): if asd_id in asd_map: asd_map[asd_id]['usage'] = {'size': int(stats['capacity']), 'used': int(stats['disk_usage']), 'available': int(stats['capacity'] - stats['disk_usage'])} # Load information from alba backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(self.guid) if EtcdConfiguration.exists(backend_interval_key): interval = EtcdConfiguration.get(backend_interval_key) else: interval = EtcdConfiguration.get('/ovs/alba/backends/global_gui_error_interval') config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}/config'.format(self.abm_services[0].service.name) for found_osd in AlbaCLI.run('list-all-osds', config=config, as_json=True): node_id = found_osd['node_id'] asd_id = found_osd['long_id'] for _disk in storage_map.get(node_id, {}).values(): asd_data = _disk['asds'].get(asd_id, {}) if 'state' not in asd_data: continue if found_osd.get('decommissioned') is True: asd_data['status'] = 'unavailable' asd_data['status_detail'] = 'decommissioned' continue state = asd_data['state'] if state == 'ok': if found_osd['id'] is None: alba_id = found_osd['alba_id'] if alba_id is None: asd_data['status'] = 'available' else: asd_data['status'] = 'unavailable' alba_backend = alba_backend_map.get(alba_id) if alba_backend is not None: asd_data['alba_backend_guid'] = alba_backend.guid else: asd_data['alba_backend_guid'] = self.guid asd_data['status'] = 'warning' asd_data['status_detail'] = 'recenterrors' read = found_osd['read'] or [0] write = found_osd['write'] or [0] errors = found_osd['errors'] if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval): asd_data['status'] = 'claimed' asd_data['status_detail'] = '' else: asd_data['status'] = 'error' asd_data['status_detail'] = asd_data.get('state_detail', '') alba_backend = alba_backend_map.get(found_osd.get('alba_id')) if alba_backend is not None: asd_data['alba_backend_guid'] = alba_backend.guid return storage_map