def register(node_id): """ Adds a Node with a given node_id to the model :param node_id: ID of the ALBA node :type node_id: str :return: None """ node = AlbaNodeList.get_albanode_by_node_id(node_id) if node is None: main_config = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id)) node = AlbaNode() node.ip = main_config['ip'] node.port = main_config['port'] node.username = main_config['username'] node.password = main_config['password'] node.storagerouter = StorageRouterList.get_by_ip(main_config['ip']) data = node.client.get_metadata() if data['_success'] is False and data['_error'] == 'Invalid credentials': raise RuntimeError('Invalid credentials') if data['node_id'] != node_id: AlbaNodeController._logger.error('Unexpected node_id: {0} vs {1}'.format(data['node_id'], node_id)) raise RuntimeError('Unexpected node identifier') node.node_id = node_id node.type = 'ASD' node.save() # increase maintenance agents count for all nodes by 1 for backend in AlbaBackendList.get_albabackends(): nr_of_agents_key = AlbaNodeController.NR_OF_AGENTS_ETCD_TEMPLATE.format(backend.guid) if EtcdConfiguration.exists(nr_of_agents_key): EtcdConfiguration.set(nr_of_agents_key, int(EtcdConfiguration.get(nr_of_agents_key) + 1)) else: EtcdConfiguration.set(nr_of_agents_key, 1) AlbaNodeController.checkup_maintenance_agents()
def get_client(client_type=None): """ Returns a persistent storage client :param client_type: Type of store client """ if not hasattr(PersistentFactory, 'store') or PersistentFactory.store is None: if hasattr(unittest, 'running_tests') and getattr( unittest, 'running_tests'): client_type = 'dummy' if client_type is None: client_type = EtcdConfiguration.get( '/ovs/framework/stores|persistent') PersistentFactory.store = None if client_type in ['pyrakoon', 'arakoon']: from ovs.extensions.storage.persistent.pyrakoonstore import PyrakoonStore PersistentFactory.store = PyrakoonStore( str( EtcdConfiguration.get( '/ovs/framework/arakoon_clusters|ovsdb'))) if client_type == 'dummy': from ovs.extensions.storage.persistent.dummystore import DummyPersistentStore PersistentFactory.store = DummyPersistentStore() if PersistentFactory.store is None: raise RuntimeError('Invalid client_type specified') return PersistentFactory.store
def pulse(): """ Update the heartbeats for all Storage Routers :return: None """ logger = LogHandler.get('extensions', name='heartbeat') current_time = int(time.time()) machine_id = System.get_my_machine_id() amqp = '{0}://{1}:{2}@{3}//'.format(EtcdConfiguration.get('/ovs/framework/messagequeue|protocol'), EtcdConfiguration.get('/ovs/framework/messagequeue|user'), EtcdConfiguration.get('/ovs/framework/messagequeue|password'), EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format(machine_id))) celery_path = OSManager.get_path('celery') worker_states = check_output("{0} inspect ping -b {1} --timeout=5 2> /dev/null | grep OK | perl -pe 's/\x1b\[[0-9;]*m//g' || true".format(celery_path, amqp), shell=True) routers = StorageRouterList.get_storagerouters() for node in routers: if node.heartbeats is None: node.heartbeats = {} if 'celery@{0}: OK'.format(node.name) in worker_states: node.heartbeats['celery'] = current_time if node.machine_id == machine_id: node.heartbeats['process'] = current_time else: try: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats: if current_time - node.heartbeats['process'] >= HeartBeat.ARP_TIMEOUT: check_output("/usr/sbin/arp -d {0}".format(node.name), shell=True) except CalledProcessError: logger.exception('Error clearing ARP cache') node.save()
def run_event_consumer(): """ Check whether to run the event consumer """ my_ip = EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format(System.get_my_machine_id())) for endpoint in EtcdConfiguration.get('/ovs/framework/messagequeue|endpoints'): if endpoint.startswith(my_ip): return True return False
def _process_disk(_info, _disks, _node): disk = _info.get('disk') if disk is None: return disk_status = 'uninitialized' disk_status_detail = '' disk_alba_backend_guid = '' if disk['available'] is False: osd = _info.get('osd') disk_alba_state = disk['state']['state'] if disk_alba_state == 'ok': if osd is None: disk_status = 'initialized' elif osd['id'] is None: alba_id = osd['alba_id'] if alba_id is None: disk_status = 'available' else: disk_status = 'unavailable' alba_backend = alba_backend_map.get(alba_id) if alba_backend is not None: disk_alba_backend_guid = alba_backend.guid else: disk_status = 'error' disk_status_detail = 'communicationerror' disk_alba_backend_guid = self.guid for asd in _node.asds: if asd.asd_id == disk['asd_id'] and asd.statistics != {}: disk_status = 'warning' disk_status_detail = 'recenterrors' read = osd['read'] or [0] write = osd['write'] or [0] errors = osd['errors'] global_interval_key = '/ovs/alba/backends/global_gui_error_interval' backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(self.guid) interval = EtcdConfiguration.get(global_interval_key) if EtcdConfiguration.exists(backend_interval_key): interval = EtcdConfiguration.get(backend_interval_key) if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval): disk_status = 'claimed' disk_status_detail = '' elif disk_alba_state == 'decommissioned': disk_status = 'unavailable' disk_status_detail = 'decommissioned' else: disk_status = 'error' disk_status_detail = disk['state']['detail'] alba_backend = alba_backend_map.get(osd.get('alba_id')) if alba_backend is not None: disk_alba_backend_guid = alba_backend.guid disk['status'] = disk_status disk['status_detail'] = disk_status_detail disk['alba_backend_guid'] = disk_alba_backend_guid _disks.append(disk)
def run_event_consumer(): """ Check whether to run the event consumer """ my_ip = EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format( System.get_my_machine_id())) for endpoint in EtcdConfiguration.get( '/ovs/framework/messagequeue|endpoints'): if endpoint.startswith(my_ip): return True return False
def _configure_arakoon_to_volumedriver(): print "Update existing vPools" logger.info("Update existing vPools") for storagerouter in StorageRouterList.get_storagerouters(): config = ArakoonClusterConfig("voldrv") config.load_config() arakoon_nodes = [] for node in config.nodes: arakoon_nodes.append({"host": node.ip, "port": node.client_port, "node_id": node.name}) with Remote( storagerouter.ip, [os, RawConfigParser, EtcdConfiguration, StorageDriverConfiguration], "ovs" ) as remote: configuration_dir = "{0}/storagedriver/storagedriver".format( EtcdConfiguration.get("/ovs/framework/paths|cfgdir") ) if not remote.os.path.exists(configuration_dir): remote.os.makedirs(configuration_dir) for json_file in remote.os.listdir(configuration_dir): vpool_name = json_file.replace(".json", "") if json_file.endswith(".json"): if remote.os.path.exists("{0}/{1}.cfg".format(configuration_dir, vpool_name)): continue # There's also a .cfg file, so this is an alba_proxy configuration file storagedriver_config = remote.StorageDriverConfiguration("storagedriver", vpool_name) storagedriver_config.load() storagedriver_config.configure_volume_registry( vregistry_arakoon_cluster_id="voldrv", vregistry_arakoon_cluster_nodes=arakoon_nodes ) storagedriver_config.configure_distributed_lock_store( dls_type="Arakoon", dls_arakoon_cluster_id="voldrv", dls_arakoon_cluster_nodes=arakoon_nodes ) storagedriver_config.save(reload_config=True)
def migrate(master_ips=None, extra_ips=None): """ Executes all migrations. It keeps track of an internal "migration version" which is always increasing by one :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ data = EtcdConfiguration.get('/ovs/framework/versions') if EtcdConfiguration.exists('/ovs/framework/versions') else {} migrators = [] path = os.path.join(os.path.dirname(__file__), 'migration') for filename in os.listdir(path): if os.path.isfile(os.path.join(path, filename)) and filename.endswith('.py'): name = filename.replace('.py', '') module = imp.load_source(name, os.path.join(path, filename)) for member in inspect.getmembers(module): if inspect.isclass(member[1]) and member[1].__module__ == name and 'object' in [base.__name__ for base in member[1].__bases__]: migrators.append((member[1].identifier, member[1].migrate)) end_version = 0 for identifier, method in migrators: base_version = data[identifier] if identifier in data else 0 version = method(base_version, master_ips, extra_ips) if version > end_version: end_version = version data[identifier] = end_version EtcdConfiguration.set('/ovs/framework/versions', data)
def _get_free_ports(client): node_name = System.get_my_machine_id(client) clusters = [] exclude_ports = [] if EtcdConfiguration.dir_exists(ArakoonInstaller.ETCD_CONFIG_ROOT): for cluster_name in EtcdConfiguration.list( ArakoonInstaller.ETCD_CONFIG_ROOT): try: config = ArakoonClusterConfig(cluster_name) config.load_config() for node in config.nodes: if node.name == node_name: clusters.append(cluster_name) exclude_ports.append(node.client_port) exclude_ports.append(node.messaging_port) except: logger.error( ' Could not load port information of cluster {0}'. format(cluster_name)) ports = System.get_free_ports( EtcdConfiguration.get( '/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)), exclude_ports, 2, client) logger.debug( ' Loaded free ports {0} based on existing clusters {1}'.format( ports, clusters)) return ports
def _configure_arakoon_to_volumedriver(offline_node_ips=None): print 'Update existing vPools' logger.info('Update existing vPools') if offline_node_ips is None: offline_node_ips = [] for storagerouter in StorageRouterList.get_storagerouters(): config = ArakoonClusterConfig('voldrv') config.load_config() arakoon_nodes = [] for node in config.nodes: arakoon_nodes.append({'host': node.ip, 'port': node.client_port, 'node_id': node.name}) with Remote(storagerouter.ip, [os, RawConfigParser, EtcdConfiguration, StorageDriverConfiguration], 'ovs') as remote: configuration_dir = '{0}/storagedriver/storagedriver'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) if not remote.os.path.exists(configuration_dir): remote.os.makedirs(configuration_dir) for json_file in remote.os.listdir(configuration_dir): vpool_name = json_file.replace('.json', '') if json_file.endswith('.json'): if remote.os.path.exists('{0}/{1}.cfg'.format(configuration_dir, vpool_name)): continue # There's also a .cfg file, so this is an alba_proxy configuration file storagedriver_config = remote.StorageDriverConfiguration('storagedriver', vpool_name) storagedriver_config.load() storagedriver_config.configure_volume_registry(vregistry_arakoon_cluster_id='voldrv', vregistry_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.configure_distributed_lock_store(dls_type='Arakoon', dls_arakoon_cluster_id='voldrv', dls_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.save(reload_config=True)
def load_metadata(self): """ Reads the metadata for an arakoon cluster from reality :return: None """ key = ArakoonClusterMetadata.ETCD_METADATA_KEY.format(self.cluster_id) if not EtcdConfiguration.exists(key): return metadata = EtcdConfiguration.get(key) if not isinstance(metadata, dict): raise ValueError('Metadata should be a dictionary') for key in ['in_use', 'internal', 'type']: if key not in metadata: raise ValueError('Not all required metadata keys are present for arakoon cluster {0}'.format(self.cluster_id)) value = metadata[key] if key == 'in_use': if not isinstance(value, bool): raise ValueError('"in_use" should be of type "bool"') self.in_use = value elif key == 'internal': if not isinstance(value, bool): raise ValueError('"internal" should be of type "bool"') self.internal = value else: if value not in ServiceType.ARAKOON_CLUSTER_TYPES: raise ValueError('Unsupported arakoon cluster type {0} found\nPlease choose from {1}'.format(value, ', '.join(ServiceType.ARAKOON_CLUSTER_TYPES))) self.cluster_type = value
def load_config(self): """ Reads a configuration from reality """ contents = EtcdConfiguration.get(ArakoonClusterConfig.ETCD_CONFIG_KEY.format(self.cluster_id), raw=True) parser = RawConfigParser() parser.readfp(StringIO(contents)) self.nodes = [] self._extra_globals = {} for key in parser.options('global'): if key == 'plugins': self._plugins = [plugin.strip() for plugin in parser.get('global', 'plugins').split(',')] elif key in ['cluster_id', 'cluster']: pass # Ignore these else: self._extra_globals[key] = parser.get('global', key) for node in parser.get('global', 'cluster').split(','): node = node.strip() self.nodes.append(ArakoonNodeConfig(name=node, ip=parser.get(node, 'ip'), client_port=parser.get(node, 'client_port'), messaging_port=parser.get(node, 'messaging_port'), log_dir=parser.get(node, 'log_dir'), home=parser.get(node, 'home'), tlog_dir=parser.get(node, 'tlog_dir')))
def load_config(self): """ Reads a configuration from reality """ contents = EtcdConfiguration.get(ArakoonClusterConfig.ETCD_CONFIG_KEY.format(self.cluster_id), raw=True) parser = RawConfigParser() parser.readfp(StringIO(contents)) self.nodes = [] self._extra_globals = {} for key in parser.options('global'): if key == 'plugins': self._plugins = [plugin.strip() for plugin in parser.get('global', 'plugins').split(',')] elif key in ['cluster_id', 'cluster']: pass # Ignore these else: self._extra_globals[key] = parser.get('global', key) for node in parser.get('global', 'cluster').split(','): node = node.strip() self.nodes.append(ArakoonNodeConfig(name=node, ip=parser.get(node, 'ip'), client_port=parser.get(node, 'client_port'), messaging_port=parser.get(node, 'messaging_port'), log_sinks=parser.get(node, 'log_sinks'), crash_log_sinks=parser.get(node, 'crash_log_sinks'), home=parser.get(node, 'home'), tlog_dir=parser.get(node, 'tlog_dir')))
def migrate(master_ips=None, extra_ips=None): """ Executes all migrations. It keeps track of an internal "migration version" which is always increasing by one :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ data = EtcdConfiguration.get( '/ovs/framework/versions') if EtcdConfiguration.exists( '/ovs/framework/versions') else {} migrators = [] path = os.path.join(os.path.dirname(__file__), 'migration') for filename in os.listdir(path): if os.path.isfile(os.path.join( path, filename)) and filename.endswith('.py'): name = filename.replace('.py', '') module = imp.load_source(name, os.path.join(path, filename)) for member in inspect.getmembers(module): if inspect.isclass( member[1] ) and member[1].__module__ == name and 'object' in [ base.__name__ for base in member[1].__bases__ ]: migrators.append( (member[1].identifier, member[1].migrate)) end_version = 0 for identifier, method in migrators: base_version = data[identifier] if identifier in data else 0 version = method(base_version, master_ips, extra_ips) if version > end_version: end_version = version data[identifier] = end_version EtcdConfiguration.set('/ovs/framework/versions', data)
def collapse_arakoon(): """ Collapse Arakoon's Tlogs :return: None """ ScheduledTaskController._logger.info('Starting arakoon collapse') arakoon_clusters = [] for service in ServiceList.get_services(): if service.is_internal is True and \ service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON, ServiceType.SERVICE_TYPES.NS_MGR, ServiceType.SERVICE_TYPES.ALBA_MGR): arakoon_clusters.append(service.name.replace('arakoon-', '')) for cluster in arakoon_clusters: ScheduledTaskController._logger.info(' Collapsing cluster {0}'.format(cluster)) contents = EtcdConfiguration.get(ArakoonClusterConfig.ETCD_CONFIG_KEY.format(cluster), raw=True) parser = RawConfigParser() parser.readfp(StringIO(contents)) nodes = {} for node in parser.get('global', 'cluster').split(','): node = node.strip() nodes[node] = ([str(parser.get(node, 'ip'))], int(parser.get(node, 'client_port'))) config = ArakoonClientConfig(str(cluster), nodes) for node in nodes.keys(): ScheduledTaskController._logger.info(' Collapsing node: {0}'.format(node)) client = ArakoonAdmin(config) try: client.collapse(str(node), 2) except: ScheduledTaskController._logger.exception('Error during collapsing cluster {0} node {1}'.format(cluster, node)) ScheduledTaskController._logger.info('Arakoon collapse finished')
def update_status(storagedriver_id): """ Sets Storage Driver offline in case hypervisor management Center reports the hypervisor pmachine related to this Storage Driver as unavailable. :param storagedriver_id: ID of the storagedriver to update its status """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) storagerouter = storagedriver.storagerouter if pmachine.mgmtcenter: # Update status pmachine.invalidate_dynamics(['host_status']) else: # No management Center, cannot update status via api logger.info('Updating status of pmachine {0} using SSHClient'.format(pmachine.name)) host_status = 'RUNNING' try: client = SSHClient(storagerouter, username='******') configuration_dir = EtcdConfiguration.get('/ovs/framework/paths|cfgdir') logger.info('SSHClient connected successfully to {0} at {1}'.format(pmachine.name, client.ip)) with Remote(client.ip, [LocalStorageRouterClient]) as remote: lsrc = remote.LocalStorageRouterClient('{0}/storagedriver/storagedriver/{1}.json'.format(configuration_dir, storagedriver.vpool.name)) lsrc.server_revision() logger.info('LocalStorageRouterClient connected successfully to {0} at {1}'.format(pmachine.name, client.ip)) except Exception as ex: logger.error('Connectivity check failed, assuming host {0} is halted. {1}'.format(pmachine.name, ex)) host_status = 'HALTED' if host_status != 'RUNNING': # Host is stopped storagedriver_client = StorageDriverClient.load(storagedriver.vpool) storagedriver_client.mark_node_offline(str(storagedriver.storagedriver_id))
def register(name, email, company, phone, newsletter): """ Registers the environment """ SupportAgent().run() # Execute a single heartbeat run client = OVSClient('monitoring.openvstorage.com', 443, credentials=None, verify=True, version=1) task_id = client.post( '/support/register/', data={ 'cluster_id': EtcdConfiguration.get('/ovs/framework/cluster_id'), 'name': name, 'email': email, 'company': company, 'phone': phone, 'newsletter': newsletter, 'register_only': True }) if task_id: client.wait_for_task(task_id, timeout=120) EtcdConfiguration.set('/ovs/framework/registered', True)
def load_target_definition(source, allow_override=False): logging_target = {'type': 'console'} try: from ovs.extensions.db.etcd.configuration import EtcdConfiguration logging_target = EtcdConfiguration.get('/ovs/framework/logging') except: pass target_type = logging_target.get('type', 'console') if allow_override is True and 'OVS_LOGTYPE_OVERRIDE' in os.environ: target_type = os.environ['OVS_LOGTYPE_OVERRIDE'] if target_type == 'redis': queue = logging_target.get('queue', '/ovs/logging') if '{0}' in queue: queue = queue.format(source) return { 'type': 'redis', 'queue': '/{0}'.format(queue.lstrip('/')), 'host': logging_target.get('host', 'localhost'), 'port': logging_target.get('port', 6379) } if target_type == 'file': return {'type': 'file', 'filename': LogHandler.load_path(source)} return {'type': 'console'}
def collapse_arakoon(): """ Collapse Arakoon's Tlogs :return: None """ logger.info('Starting arakoon collapse') arakoon_clusters = {} for service in ServiceList.get_services(): if service.type.name in ('Arakoon', 'NamespaceManager', 'AlbaManager'): arakoon_clusters[service.name.replace('arakoon-', '')] = service.storagerouter for cluster, storagerouter in arakoon_clusters.iteritems(): logger.info(' Collapsing cluster {0}'.format(cluster)) contents = EtcdConfiguration.get(ArakoonClusterConfig.ETCD_CONFIG_KEY.format(cluster), raw=True) parser = RawConfigParser() parser.readfp(StringIO(contents)) nodes = {} for node in parser.get('global', 'cluster').split(','): node = node.strip() nodes[node] = ([parser.get(node, 'ip')], parser.get(node, 'client_port')) config = ArakoonClientConfig(str(cluster), nodes) for node in nodes.keys(): logger.info(' Collapsing node: {0}'.format(node)) client = ArakoonAdminClient(node, config) try: client.collapse_tlogs(2) except: logger.exception('Error during collapsing cluster {0} node {1}'.format(cluster, node)) logger.info('Arakoon collapse finished')
def __init__(self, config_type, vpool_name, number=None): """ Initializes the class """ def make_configure(sct): """ section closure :param sct: Section to create configure function for """ return lambda **kwargs: self._add(sct, **kwargs) if config_type not in ['storagedriver', 'metadataserver']: raise RuntimeError('Invalid configuration type. Allowed: storagedriver, metadataserver') self.config_type = config_type self.vpool_name = vpool_name self.configuration = {} self.is_new = True self.dirty_entries = [] self.number = number self.params = copy.deepcopy(StorageDriverConfiguration.parameters) # Never use parameters directly self.base_path = '{0}/storagedriver/{1}'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir'), self.config_type) if self.number is None: self.path = '{0}/{1}.json'.format(self.base_path, self.vpool_name) else: self.path = '{0}/{1}_{2}.json'.format(self.base_path, self.vpool_name, self.number) # Fix some manual "I know what I'm doing" overrides backend_connection_manager = 'backend_connection_manager' self.params[self.config_type][backend_connection_manager]['optional'].append('s3_connection_strict_consistency') # Generate configure_* methods for section in self.params[self.config_type]: setattr(self, 'configure_{0}'.format(section), make_configure(section))
def _has_plugin(self): """ Checks whether this BackendType has a plugin installed """ try: return self.code in EtcdConfiguration.get('/ovs/framework/plugins/installed|backends') except: return False
def get_mds_storagedriver_config_set(vpool, check_online=False): """ Builds a configuration for all StorageRouters from a given VPool with following goals: * Primary MDS is the local one * All slaves are on different hosts * Maximum `mds.safety` nodes are returned The configuration returned is the default configuration used by the volumedriver of which in normal use-cases only the 1st entry is used, because at volume creation time, the volumedriver needs to create 1 master MDS During ensure_safety, we actually create/set the MDS slaves for each volume :param vpool: vPool to get storagedriver configuration for :param check_online: Check whether the storage routers are actually responsive """ mds_per_storagerouter = {} mds_per_load = {} for storagedriver in vpool.storagedrivers: storagerouter = storagedriver.storagerouter if check_online is True: try: client = SSHClient(storagerouter) client.run('pwd') except UnableToConnectException: continue mds_service, load = MDSServiceController.get_preferred_mds(storagerouter, vpool, include_load=True) mds_per_storagerouter[storagerouter] = {'host': storagerouter.ip, 'port': mds_service.service.ports[0]} if load not in mds_per_load: mds_per_load[load] = [] mds_per_load[load].append(storagerouter) safety = EtcdConfiguration.get('/ovs/framework/storagedriver|mds_safety') config_set = {} for storagerouter, ip_info in mds_per_storagerouter.iteritems(): primary_failure_domain = storagerouter.primary_failure_domain secondary_failure_domain = storagerouter.secondary_failure_domain config_set[storagerouter.guid] = [ip_info] for load in sorted(mds_per_load): if len(config_set[storagerouter.guid]) >= safety: break other_storagerouters = mds_per_load[load] random.shuffle(other_storagerouters) for other_storagerouter in other_storagerouters: if len(config_set[storagerouter.guid]) >= safety: break if other_storagerouter != storagerouter and other_storagerouter in primary_failure_domain.primary_storagerouters: config_set[storagerouter.guid].append(mds_per_storagerouter[other_storagerouter]) if secondary_failure_domain is not None: for load in sorted(mds_per_load): if len(config_set[storagerouter.guid]) >= safety: break other_storagerouters = mds_per_load[load] random.shuffle(other_storagerouters) for other_storagerouter in other_storagerouters: if len(config_set[storagerouter.guid]) >= safety: break if other_storagerouter != storagerouter and other_storagerouter in secondary_failure_domain.primary_storagerouters: config_set[storagerouter.guid].append(mds_per_storagerouter[other_storagerouter]) return config_set
def _has_plugin(self): """ Checks whether this BackendType has a plugin installed """ try: return self.code in EtcdConfiguration.get( '/ovs/framework/plugins/installed|backends') except: return False
def gather_facts(): """ Gather facts from a node :returns dictionary with information about Open vStorage on the target node :rtype dict """ facts = {} # fetch present information from 'ovs setup' setup_information = { 'ovs_installed': ovs_present, 'ovs_setup_completed': ovs_configured, 'alba_installed': asdmanager_present, 'alba_setup_completed': asdmanager_configured } facts.update({'general': setup_information}) # fetch ovs information if ovs is installed and configured if ovs_present and ovs_configured: # pre-fetch data openvstorage_id = open('/etc/openvstorage_id', 'r') node_id = openvstorage_id.read().strip() openvstorage_id.close() support = EtcdConfiguration.get('/ovs/framework/support') grid_ip = str(EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format(node_id))) ovs_cluster_information = { 'cluster_id': str(EtcdConfiguration.get('/ovs/framework/cluster_id')), 'node_id': str(node_id), 'grid_ip': grid_ip, 'node_type': str(EtcdConfiguration.get('/ovs/framework/hosts/{0}/type'.format(node_id))), 'base_dir': str(EtcdConfiguration.get('/ovs/framework/paths').get('basedir')), 'heartbeat_enabled': str(support.get('enabled')), 'remote_support_enabled': str(support.get('enablesupport')), 'etcd_proxy': '{0}=http://{1}:2380'.format(node_id, grid_ip), 'partition_config': System.get_my_storagerouter().partition_config } facts.update({'ovs': ovs_cluster_information}) return facts
def list(self, discover=False, ip=None, node_id=None): """ Lists all available ALBA Nodes :param discover: If True and IP provided, return list of single ALBA node, If True and no IP provided, return all ALBA nodes else return modeled ALBA nodes :param ip: IP of ALBA node to retrieve :param node_id: ID of the ALBA node """ if discover is False and (ip is not None or node_id is not None): raise RuntimeError('Discover is mutually exclusive with IP and nodeID') if (ip is None and node_id is not None) or (ip is not None and node_id is None): raise RuntimeError('Both IP and nodeID need to be specified') if discover is False: return AlbaNodeList.get_albanodes() if ip is not None: node = AlbaNode(volatile=True) node.ip = ip node.type = 'ASD' node.node_id = node_id node.port = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|port'.format(node_id)) node.username = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|username'.format(node_id)) node.password = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|password'.format(node_id)) data = node.client.get_metadata() if data['_success'] is False and data['_error'] == 'Invalid credentials': raise RuntimeError('Invalid credentials') if data['node_id'] != node_id: raise RuntimeError('Unexpected node identifier. {0} vs {1}'.format(data['node_id'], node_id)) node_list = DataList(AlbaNode, {}) node_list._executed = True node_list._guids = [node.guid] node_list._objects = {node.guid: node} node_list._data = {node.guid: {'guid': node.guid, 'data': node._data}} return node_list nodes = {} model_node_ids = [node.node_id for node in AlbaNodeList.get_albanodes()] found_node_ids = [] asd_node_ids = [] if EtcdConfiguration.dir_exists('/ovs/alba/asdnodes'): asd_node_ids = EtcdConfiguration.list('/ovs/alba/asdnodes') for node_id in asd_node_ids: node = AlbaNode(volatile=True) node.type = 'ASD' node.node_id = node_id node.ip = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|ip'.format(node_id)) node.port = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|port'.format(node_id)) node.username = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|username'.format(node_id)) node.password = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|password'.format(node_id)) if node.node_id not in model_node_ids and node.node_id not in found_node_ids: nodes[node.guid] = node found_node_ids.append(node.node_id) node_list = DataList(AlbaNode, {}) node_list._executed = True node_list._guids = nodes.keys() node_list._objects = nodes node_list._data = dict([(node.guid, {'guid': node.guid, 'data': node._data}) for node in nodes.values()]) return node_list
def __init__(self): """ Initializes the client """ self._enable_support = EtcdConfiguration.get('/ovs/framework/support|enablesupport') self.interval = EtcdConfiguration.get('/ovs/framework/support|interval') self._url = 'https://monitoring.openvstorage.com/api/support/heartbeat/' init_info = check_output('cat /proc/1/comm', shell=True) # All service classes used in below code should share the exact same interface! if 'init' in init_info: version_info = check_output('init --version', shell=True) if 'upstart' in version_info: self.servicemanager = 'upstart' else: RuntimeError('There was no known service manager detected in /proc/1/comm') elif 'systemd' in init_info: self.servicemanager = 'systemd' else: raise RuntimeError('There was no known service manager detected in /proc/1/comm')
def get_path(binary_name): machine_id = System.get_my_machine_id() config_location = '/ovs/framework/hosts/{0}/paths|{1}'.format(machine_id, binary_name) path = EtcdConfiguration.get(config_location) if not path: try: path = check_output('which {0}'.format(binary_name), shell=True).strip() EtcdConfiguration.set(config_location, path) except CalledProcessError: return None return path
def pulse(): """ Update the heartbeats for all Storage Routers :return: None """ logger = LogHandler.get('extensions', name='heartbeat') current_time = int(time.time()) machine_id = System.get_my_machine_id() amqp = '{0}://{1}:{2}@{3}//'.format( EtcdConfiguration.get('/ovs/framework/messagequeue|protocol'), EtcdConfiguration.get('/ovs/framework/messagequeue|user'), EtcdConfiguration.get('/ovs/framework/messagequeue|password'), EtcdConfiguration.get( '/ovs/framework/hosts/{0}/ip'.format(machine_id))) celery_path = OSManager.get_path('celery') worker_states = check_output( "{0} inspect ping -b {1} --timeout=5 2> /dev/null | grep OK | perl -pe 's/\x1b\[[0-9;]*m//g' || true" .format(celery_path, amqp), shell=True) routers = StorageRouterList.get_storagerouters() for node in routers: if node.heartbeats is None: node.heartbeats = {} if 'celery@{0}: OK'.format(node.name) in worker_states: node.heartbeats['celery'] = current_time if node.machine_id == machine_id: node.heartbeats['process'] = current_time else: try: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats: if current_time - node.heartbeats[ 'process'] >= HeartBeat.ARP_TIMEOUT: check_output("/usr/sbin/arp -d {0}".format( node.name), shell=True) except CalledProcessError: logger.exception('Error clearing ARP cache') node.save()
def get_client(client_type=None): """ Returns a volatile storage client """ if not hasattr(VolatileFactory, 'store') or VolatileFactory.store is None: if client_type is None: client_type = EtcdConfiguration.get('/ovs/framework/stores|volatile') VolatileFactory.store = None if client_type == 'memcache': from ovs.extensions.storage.volatile.memcachestore import MemcacheStore nodes = EtcdConfiguration.get('/ovs/framework/memcache|endpoints') VolatileFactory.store = MemcacheStore(nodes) if client_type == 'default': from ovs.extensions.storage.volatile.dummystore import DummyVolatileStore VolatileFactory.store = DummyVolatileStore() if VolatileFactory.store is None: raise RuntimeError('Invalid client_type specified') return VolatileFactory.store
def ovs_3977_maintenance_agent_test(): """ Test maintenance agent processes """ def _get_agent_distribution(agent_name): result = {} total = 0 for ip in alba_node_ips: count = General.execute_command_on_node(ip, 'ls /etc/init/alba-maintenance_{0}-* | wc -l'.format(agent_name)) if count: count = int(count) else: count = 0 total += count result[ip] = count result['total'] = total print 'Maintenance agent distribution: {0}'.format(result) for ip in alba_node_ips: assert (result[ip] == total / len(alba_node_ips) or result[ip] == (total / len(alba_node_ips)) + 1),\ "Agents not equally distributed!" return result backend = GeneralBackend.get_by_name(TestALBA.backend_name) if backend is None: backend = GeneralAlba.add_alba_backend(TestALBA.backend_name).backend name = backend.alba_backend.name alba_node_ips = [node.ip for node in GeneralAlba.get_alba_nodes()] etcd_key = '/ovs/alba/backends/{0}/maintenance/nr_of_agents'.format(backend.alba_backend.guid) nr_of_agents = EtcdConfiguration.get(etcd_key) print '1. - nr of agents: {0}'.format(nr_of_agents) actual_nr_of_agents = _get_agent_distribution(name)['total'] assert nr_of_agents == actual_nr_of_agents, \ 'Actual {0} and requested {1} nr of agents does not match'.format(nr_of_agents, actual_nr_of_agents) # set nr to zero EtcdConfiguration.set(etcd_key, 0) GeneralAlba.checkup_maintenance_agents() assert _get_agent_distribution(name)['total'] == 0, \ 'Actual {0} and requested {1} nr of agents does not match'.format(nr_of_agents, actual_nr_of_agents) print '2. - nr of agents: {0}'.format(nr_of_agents) # set nr to 10 EtcdConfiguration.set(etcd_key, 10) GeneralAlba.checkup_maintenance_agents() assert _get_agent_distribution(name)['total'] == 10, \ 'Actual {0} and requested {1} nr of agents does not match'.format(nr_of_agents, actual_nr_of_agents) print '3. - nr of agents: {0}'.format(nr_of_agents)
def json_files_check_test(): """ Verify some configuration files in json format """ issues_found = '' srs = GeneralStorageRouter.get_storage_routers() for sr in srs: config_contents = EtcdConfiguration.get('/ovs/framework/hosts/{0}/setupcompleted'.format(sr.machine_id), raw = True) if "true" not in config_contents: issues_found += "Setup not completed for node {0}\n".format(sr.name) assert issues_found == '', "Found the following issues while checking for the setupcompleted:{0}\n".format(issues_found)
def get_client(client_type=None): """ Returns a volatile storage client """ if not hasattr(VolatileFactory, 'store') or VolatileFactory.store is None: if hasattr(unittest, 'running_tests') and getattr(unittest, 'running_tests'): client_type = 'dummy' if client_type is None: client_type = EtcdConfiguration.get('/ovs/framework/stores|volatile') VolatileFactory.store = None if client_type == 'memcache': from ovs.extensions.storage.volatile.memcachestore import MemcacheStore nodes = EtcdConfiguration.get('/ovs/framework/memcache|endpoints') VolatileFactory.store = MemcacheStore(nodes) if client_type == 'dummy': from ovs.extensions.storage.volatile.dummystore import DummyVolatileStore VolatileFactory.store = DummyVolatileStore() if VolatileFactory.store is None: raise RuntimeError('Invalid client_type specified') return VolatileFactory.store
def load(self): """ Loads the configuration from a given file, optionally a remote one """ self.configuration = {} if EtcdConfiguration.dir_exists(self.path.format('')): self.is_new = False for key in self.params[self.config_type]: if EtcdConfiguration.exists(self.path.format(key)): self.configuration[key] = json.loads(EtcdConfiguration.get(self.path.format(key), raw=True)) else: self._logger.debug('Could not find config {0}, a new one will be created'.format(self.path.format(''))) self.dirty_entries = []
def load(self): """ Loads the configuration from a given file, optionally a remote one :param client: If provided, load remote configuration """ contents = '{}' if EtcdConfiguration.exists(self.path): contents = EtcdConfiguration.get(self.path, raw=True) self.is_new = False else: logger.debug('Could not find config {0}, a new one will be created'.format(self.path)) self.dirty_entries = [] self.configuration = json.loads(contents)
def get_client(client_type=None): """ Returns a persistent storage client :param client_type: Type of store client """ if not hasattr(PersistentFactory, 'store') or PersistentFactory.store is None: if hasattr(unittest, 'running_tests') and getattr(unittest, 'running_tests'): client_type = 'dummy' if client_type is None: client_type = EtcdConfiguration.get('/ovs/framework/stores|persistent') PersistentFactory.store = None if client_type in ['pyrakoon', 'arakoon']: from ovs.extensions.storage.persistent.pyrakoonstore import PyrakoonStore PersistentFactory.store = PyrakoonStore(str(EtcdConfiguration.get('/ovs/framework/arakoon_clusters|ovsdb'))) if client_type == 'dummy': from ovs.extensions.storage.persistent.dummystore import DummyPersistentStore PersistentFactory.store = DummyPersistentStore() if PersistentFactory.store is None: raise RuntimeError('Invalid client_type specified') return PersistentFactory.store
def on_demote(cluster_ip, master_ip, offline_node_ips=None): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :type cluster_ip: str :param master_ip: IP of the master node :type master_ip: str :param offline_node_ips: IPs of nodes which are offline :type offline_node_ips: list :return: None """ _ = master_ip if offline_node_ips is None: offline_node_ips = [] client = SSHClient( cluster_ip, username='******') if cluster_ip not in offline_node_ips else None servicetype = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv' and service.is_internal is True: # Externally managed arakoon cluster service does not have storage router if service.storagerouter.ip == cluster_ip: current_service = service elif service.storagerouter.ip not in offline_node_ips: remaining_ips.append(service.storagerouter.ip) if current_service is not None: StorageDriverController._logger.debug( '* Shrink StorageDriver cluster') cluster_name = str( EtcdConfiguration.get( '/ovs/framework/arakoon_clusters|voldrv')) ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, cluster_name=cluster_name, offline_nodes=offline_node_ips) if client is not None and ServiceManager.has_service( current_service.name, client=client) is True: ServiceManager.stop_service(current_service.name, client=client) ServiceManager.remove_service(current_service.name, client=client) ArakoonInstaller.restart_cluster_remove(cluster_name, remaining_ips) current_service.delete() StorageDriverController._configure_arakoon_to_volumedriver( cluster_name=cluster_name)
def get_config(cluster_name): """ Retrieve the configuration for given cluster :param cluster_name: Name of the cluster :return: RawConfigParser object """ etcd_key = GeneralArakoon.ETCD_CONFIG_KEY.format(cluster_name) if not EtcdConfiguration.exists(etcd_key, raw=True): raise ValueError('Unknown arakoon cluster_name {0} provided'.format(cluster_name)) voldrv_config = EtcdConfiguration.get(etcd_key, raw=True) parser = RawConfigParser() parser.readfp(StringIO(voldrv_config)) return parser
def _process_task(task, metadata, servicemanager): """ Processes a task """ try: logger.debug('Processing: {0}'.format(task)) cid = EtcdConfiguration.get('/ovs/framework/cluster_id') nid = System.get_my_machine_id() if task == 'OPEN_TUNNEL': if servicemanager == 'upstart': check_output('service openvpn stop', shell=True) else: check_output( 'systemctl stop openvpn@ovs_{0}-{1} || true'.format( cid, nid), shell=True) check_output('rm -f /etc/openvpn/ovs_*', shell=True) for filename, contents in metadata['files'].iteritems(): with open(filename, 'w') as the_file: the_file.write(base64.b64decode(contents)) if servicemanager == 'upstart': check_output('service openvpn start', shell=True) else: check_output('systemctl start openvpn@ovs_{0}-{1}'.format( cid, nid), shell=True) elif task == 'CLOSE_TUNNEL': if servicemanager == 'upstart': check_output('service openvpn stop', shell=True) else: check_output('systemctl stop openvpn@ovs_{0}-{1}'.format( cid, nid), shell=True) check_output('rm -f /etc/openvpn/ovs_*', shell=True) elif task == 'UPLOAD_LOGFILES': logfile = check_output('ovs collect logs', shell=True).strip() check_output( 'mv {0} /tmp/{1}; curl -T /tmp/{1} ftp://{2} --user {3}:{4}; rm -f {0} /tmp/{1}' .format(logfile, metadata['filename'], metadata['endpoint'], metadata['user'], metadata['password']), shell=True) else: raise RuntimeError('Unknown task') except Exception, ex: logger.exception( 'Unexpected error while processing task {0} (data: {1}): {2}'. format(task, json.dumps(metadata), ex)) raise
def get_heartbeat_data(self): """ Returns heartbeat data """ data = {'cid': EtcdConfiguration.get('/ovs/framework/cluster_id'), 'nid': System.get_my_machine_id(), 'metadata': {}, 'errors': []} try: # Versions data['metadata']['versions'] = PackageManager.get_versions() except Exception, ex: data['errors'].append(str(ex))
def __init__(self): """ Initializes the client """ self._enable_support = EtcdConfiguration.get( '/ovs/framework/support|enablesupport') self.interval = EtcdConfiguration.get( '/ovs/framework/support|interval') self._url = 'https://monitoring.openvstorage.com/api/support/heartbeat/' init_info = check_output('cat /proc/1/comm', shell=True) # All service classes used in below code should share the exact same interface! if 'init' in init_info: version_info = check_output('init --version', shell=True) if 'upstart' in version_info: self.servicemanager = 'upstart' else: RuntimeError( 'There was no known service manager detected in /proc/1/comm' ) elif 'systemd' in init_info: self.servicemanager = 'systemd' else: raise RuntimeError( 'There was no known service manager detected in /proc/1/comm')
def __init__(self, cluster): """ Initializes the client """ contents = EtcdConfiguration.get(PyrakoonStore.ETCD_CONFIG_KEY.format(cluster), raw=True) parser = RawConfigParser() parser.readfp(StringIO(contents)) nodes = {} for node in parser.get('global', 'cluster').split(','): node = node.strip() nodes[node] = ([parser.get(node, 'ip')], int(parser.get(node, 'client_port'))) self._config = ArakoonClientConfig(cluster, nodes) self._client = ArakoonClient(self._config) self._identifier = int(round(random.random() * 10000000)) self._lock = Lock() self._batch_size = 100
def get_heartbeat_data(self): """ Returns heartbeat data """ data = { 'cid': EtcdConfiguration.get('/ovs/framework/cluster_id'), 'nid': System.get_my_machine_id(), 'metadata': {}, 'errors': [] } try: # Versions data['metadata']['versions'] = PackageManager.get_versions() except Exception, ex: data['errors'].append(str(ex))
def load(self): """ Loads the configuration from a given file, optionally a remote one """ self.configuration = {} if EtcdConfiguration.dir_exists(self.path.format('')): self.is_new = False for key in self.params[self.config_type]: if EtcdConfiguration.exists(self.path.format(key)): self.configuration[key] = json.loads( EtcdConfiguration.get(self.path.format(key), raw=True)) else: self._logger.debug( 'Could not find config {0}, a new one will be created'.format( self.path.format(''))) self.dirty_entries = []
def register(name, email, company, phone, newsletter): """ Registers the environment """ SupportAgent().run() # Execute a single heartbeat run client = OVSClient('monitoring.openvstorage.com', 443, credentials=None, verify=True, version=1) task_id = client.post('/support/register/', data={'cluster_id': EtcdConfiguration.get('/ovs/framework/cluster_id'), 'name': name, 'email': email, 'company': company, 'phone': phone, 'newsletter': newsletter, 'register_only': True}) if task_id: client.wait_for_task(task_id, timeout=120) EtcdConfiguration.set('/ovs/framework/registered', True)
def update_status(storagedriver_id): """ Sets Storage Driver offline in case hypervisor management Center reports the hypervisor pmachine related to this Storage Driver as unavailable. :param storagedriver_id: ID of the storagedriver to update its status """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) storagerouter = storagedriver.storagerouter if pmachine.mgmtcenter: # Update status pmachine.invalidate_dynamics(['host_status']) else: # No management Center, cannot update status via api logger.info( 'Updating status of pmachine {0} using SSHClient'.format( pmachine.name)) host_status = 'RUNNING' try: client = SSHClient(storagerouter, username='******') configuration_dir = EtcdConfiguration.get( '/ovs/framework/paths|cfgdir') logger.info( 'SSHClient connected successfully to {0} at {1}'.format( pmachine.name, client.ip)) with Remote(client.ip, [LocalStorageRouterClient]) as remote: lsrc = remote.LocalStorageRouterClient( '{0}/storagedriver/storagedriver/{1}.json'.format( configuration_dir, storagedriver.vpool.name)) lsrc.server_revision() logger.info( 'LocalStorageRouterClient connected successfully to {0} at {1}' .format(pmachine.name, client.ip)) except Exception as ex: logger.error( 'Connectivity check failed, assuming host {0} is halted. {1}' .format(pmachine.name, ex)) host_status = 'HALTED' if host_status != 'RUNNING': # Host is stopped storagedriver_client = StorageDriverClient.load( storagedriver.vpool) storagedriver_client.mark_node_offline( str(storagedriver.storagedriver_id))
def get_path(binary_name): """ Retrieve the absolute path for binary :param binary_name: Binary to get path for :return: Path """ machine_id = System.get_my_machine_id() config_location = '/ovs/framework/hosts/{0}/paths|{1}'.format(machine_id, binary_name) if not EtcdConfiguration.exists(config_location): try: path = check_output('which {0}'.format(binary_name), shell=True).strip() EtcdConfiguration.set(config_location, path) except CalledProcessError: return None else: path = EtcdConfiguration.get(config_location) return path
def migrate(master_ips=None, extra_ips=None): """ Executes all migrations. It keeps track of an internal "migration version" which is always increasing by one :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ machine_id = System.get_my_machine_id() key = '/ovs/framework/hosts/{0}/versions'.format(machine_id) try: data = EtcdConfiguration.get(key) if EtcdConfiguration.exists( key) else {} except EtcdConnectionFailed: import json # Most likely 2.6 to 2.7 migration data = {} filename = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(filename): with open(filename) as config_file: data = json.load(config_file).get('core', {}).get('versions', {}) migrators = [] path = '/'.join([os.path.dirname(__file__), 'migration']) for filename in os.listdir(path): if os.path.isfile('/'.join([path, filename ])) and filename.endswith('.py'): name = filename.replace('.py', '') module = imp.load_source(name, '/'.join([path, filename])) for member in inspect.getmembers(module): if inspect.isclass( member[1] ) and member[1].__module__ == name and 'object' in [ base.__name__ for base in member[1].__bases__ ]: migrators.append( (member[1].identifier, member[1].migrate)) end_version = 0 for identifier, method in migrators: base_version = data[identifier] if identifier in data else 0 version = method(base_version, master_ips, extra_ips) if version > end_version: end_version = version data[identifier] = end_version EtcdConfiguration.set(key, data)
def __init__(self, cluster): """ Initializes the client """ contents = EtcdConfiguration.get( PyrakoonStore.ETCD_CONFIG_KEY.format(cluster), raw=True) parser = RawConfigParser() parser.readfp(StringIO(contents)) nodes = {} for node in parser.get('global', 'cluster').split(','): node = node.strip() nodes[node] = ([str(parser.get(node, 'ip'))], int(parser.get(node, 'client_port'))) self._config = ArakoonClientConfig(str(cluster), nodes) self._client = ArakoonClient(self._config) self._identifier = int(round(random.random() * 10000000)) self._lock = Lock() self._batch_size = 500 self._sequences = {}
def up_and_running(mountpoint, storagedriver_id): """ Volumedriver informs us that the service is completely started. Post-start events can be executed :param mountpoint: Mountpoint to check :param storagedriver_id: ID of the storagedriver """ storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) if storagedriver is None: raise RuntimeError('A Storage Driver with id {0} could not be found.'.format(storagedriver_id)) storagedriver.startup_counter += 1 storagedriver.save() if storagedriver.storagerouter.pmachine.hvtype == 'VMWARE': client = SSHClient(storagedriver.storagerouter) machine_id = System.get_my_machine_id(client) if EtcdConfiguration.get('/ovs/framework/hosts/{0}/storagedriver|vmware_mode'.format(machine_id)) == 'classic': nfs = Nfsexports() nfs.unexport(mountpoint) nfs.export(mountpoint) nfs.trigger_rpc_mountd()
def get_path(binary_name): """ Retrieve the absolute path for binary :param binary_name: Binary to get path for :return: Path """ machine_id = System.get_my_machine_id() config_location = '/ovs/framework/hosts/{0}/paths|{1}'.format( machine_id, binary_name) if not EtcdConfiguration.exists(config_location): try: path = check_output('which {0}'.format(binary_name), shell=True).strip() EtcdConfiguration.set(config_location, path) except CalledProcessError: return None else: path = EtcdConfiguration.get(config_location) return path
def collapse_arakoon(): """ Collapse Arakoon's Tlogs :return: None """ ScheduledTaskController._logger.info('Starting arakoon collapse') arakoon_clusters = [] for service in ServiceList.get_services(): if service.is_internal is True and \ service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON, ServiceType.SERVICE_TYPES.NS_MGR, ServiceType.SERVICE_TYPES.ALBA_MGR): arakoon_clusters.append(service.name.replace('arakoon-', '')) for cluster in arakoon_clusters: ScheduledTaskController._logger.info( ' Collapsing cluster {0}'.format(cluster)) contents = EtcdConfiguration.get( ArakoonClusterConfig.ETCD_CONFIG_KEY.format(cluster), raw=True) parser = RawConfigParser() parser.readfp(StringIO(contents)) nodes = {} for node in parser.get('global', 'cluster').split(','): node = node.strip() nodes[node] = ([str(parser.get(node, 'ip'))], int(parser.get(node, 'client_port'))) config = ArakoonClientConfig(str(cluster), nodes) for node in nodes.keys(): ScheduledTaskController._logger.info( ' Collapsing node: {0}'.format(node)) client = ArakoonAdmin(config) try: client.collapse(str(node), 2) except: ScheduledTaskController._logger.exception( 'Error during collapsing cluster {0} node {1}'.format( cluster, node)) ScheduledTaskController._logger.info('Arakoon collapse finished')
def get_client(client_type=None): """ Returns a persistent storage client """ if not hasattr(PersistentFactory, 'store') or PersistentFactory.store is None: if client_type is None: client_type = EtcdConfiguration.get('/ovs/framework/stores|persistent') PersistentFactory.store = None if client_type == 'pyrakoon': from ovs.extensions.storage.persistent.pyrakoonstore import PyrakoonStore PersistentFactory.store = PyrakoonStore('ovsdb') if client_type == 'arakoon': from ovs.extensions.storage.persistent.arakoonstore import ArakoonStore PersistentFactory.store = ArakoonStore('ovsdb') if client_type == 'default': from ovs.extensions.storage.persistent.dummystore import DummyPersistentStore PersistentFactory.store = DummyPersistentStore() if PersistentFactory.store is None: raise RuntimeError('Invalid client_type specified') return PersistentFactory.store
def collapse_arakoon(): """ Collapse Arakoon's Tlogs :return: None """ logger.info('Starting arakoon collapse') arakoon_clusters = {} for service in ServiceList.get_services(): if service.type.name in ('Arakoon', 'NamespaceManager', 'AlbaManager'): arakoon_clusters[service.name.replace( 'arakoon-', '')] = service.storagerouter for cluster, storagerouter in arakoon_clusters.iteritems(): logger.info(' Collapsing cluster {0}'.format(cluster)) contents = EtcdConfiguration.get( ArakoonClusterConfig.ETCD_CONFIG_KEY.format(cluster), raw=True) parser = RawConfigParser() parser.readfp(StringIO(contents)) nodes = {} for node in parser.get('global', 'cluster').split(','): node = node.strip() nodes[node] = ([parser.get(node, 'ip')], parser.get(node, 'client_port')) config = ArakoonClientConfig(str(cluster), nodes) for node in nodes.keys(): logger.info(' Collapsing node: {0}'.format(node)) client = ArakoonAdminClient(node, config) try: client.collapse_tlogs(2) except: logger.exception( 'Error during collapsing cluster {0} node {1}'.format( cluster, node)) logger.info('Arakoon collapse finished')
def mds_checkup(): """ Validates the current MDS setup/configuration and takes actions where required """ logger.info('MDS checkup - Started') mds_dict = {} for vpool in VPoolList.get_vpools(): logger.info('MDS checkup - vPool {0}'.format(vpool.name)) mds_dict[vpool] = {} for mds_service in vpool.mds_services: storagerouter = mds_service.service.storagerouter if storagerouter not in mds_dict[vpool]: mds_dict[vpool][storagerouter] = { 'client': None, 'services': [] } try: client = SSHClient(storagerouter, username='******') client.run('pwd') mds_dict[vpool][storagerouter]['client'] = client logger.info( 'MDS checkup - vPool {0} - Storage Router {1} - ONLINE' .format(vpool.name, storagerouter.name)) except UnableToConnectException: logger.info( 'MDS checkup - vPool {0} - Storage Router {1} - OFFLINE' .format(vpool.name, storagerouter.name)) mds_dict[vpool][storagerouter]['services'].append(mds_service) failures = [] max_load = EtcdConfiguration.get( '/ovs/framework/storagedriver|mds_maxload') for vpool, storagerouter_info in mds_dict.iteritems(): # 1. First, make sure there's at least one MDS on every StorageRouter that's not overloaded # If not, create an extra MDS for that StorageRouter for storagerouter in storagerouter_info: client = mds_dict[vpool][storagerouter]['client'] mds_services = mds_dict[vpool][storagerouter]['services'] has_room = False for mds_service in mds_services[:]: if mds_service.capacity == 0 and len( mds_service.vdisks_guids) == 0: logger.info( 'MDS checkup - Removing mds_service {0} for vPool {1}' .format(mds_service.number, vpool.name)) MDSServiceController.remove_mds_service( mds_service, vpool, reconfigure=True, allow_offline=client is None) mds_services.remove(mds_service) for mds_service in mds_services: _, load = MDSServiceController.get_mds_load(mds_service) if load < max_load: has_room = True break logger.info( 'MDS checkup - vPool {0} - Storage Router {1} - Capacity available: {2}' .format(vpool.name, storagerouter.name, has_room)) if has_room is False and client is not None: mds_service = MDSServiceController.prepare_mds_service( storagerouter=storagerouter, vpool=vpool, fresh_only=False, reload_config=True) if mds_service is None: raise RuntimeError('Could not add MDS node') mds_services.append(mds_service) mds_config_set = MDSServiceController.get_mds_storagedriver_config_set( vpool, True) for storagerouter in storagerouter_info: client = mds_dict[vpool][storagerouter]['client'] if client is None: logger.info( 'MDS checkup - vPool {0} - Storage Router {1} - Marked as offline, not setting default MDS configuration' .format(vpool.name, storagerouter.name)) continue storagedriver = [ sd for sd in storagerouter.storagedrivers if sd.vpool_guid == vpool.guid ][0] storagedriver_config = StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load(client) if storagedriver_config.is_new is False: logger.info( 'MDS checkup - vPool {0} - Storage Router {1} - Storing default MDS configuration: {2}' .format(vpool.name, storagerouter.name, mds_config_set[storagerouter.guid])) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_filesystem( fs_metadata_backend_mds_nodes=mds_config_set[ storagerouter.guid]) storagedriver_config.save(client) # 2. Per VPool, execute a safety check, making sure the master/slave configuration is optimal. logger.info( 'MDS checkup - vPool {0} - Ensuring safety for all virtual disks' .format(vpool.name)) for vdisk in vpool.vdisks: try: MDSServiceController.ensure_safety(vdisk) except Exception as ex: failures.append( 'Ensure safety for vDisk {0} with guid {1} failed with error: {2}' .format(vdisk.name, vdisk.guid, ex)) if len(failures) > 0: raise Exception('\n - ' + '\n - '.join(failures)) logger.info('MDS checkup - Finished')
def get(self, request, *args, **kwargs): """ Fetches metadata """ _ = args, kwargs data = { 'authenticated': False, 'authentication_state': None, 'authentication_metadata': {}, 'username': None, 'userguid': None, 'roles': [], 'identification': {}, 'storagerouter_ips': [sr.ip for sr in StorageRouterList.get_storagerouters()], 'versions': list(settings.VERSION), 'plugins': {} } try: # Gather plugin metadata plugins = {} # - Backends. BackendType plugins must set the has_plugin flag on True for backend_type in BackendTypeList.get_backend_types(): if backend_type.has_plugin is True: if backend_type.code not in plugins: plugins[backend_type.code] = [] plugins[backend_type.code] += ['backend', 'gui'] # - Generic plugins, as added to the configuration file(s) generic_plugins = EtcdConfiguration.get( '/ovs/framework/plugins/installed|generic') for plugin_name in generic_plugins: if plugin_name not in plugins: plugins[plugin_name] = [] plugins[plugin_name] += ['gui'] data['plugins'] = plugins # Fill identification data['identification'] = { 'cluster_id': EtcdConfiguration.get('/ovs/framework/cluster_id') } # Get authentication metadata authentication_metadata = {'ip': System.get_my_storagerouter().ip} for key in ['mode', 'authorize_uri', 'client_id', 'scope']: if EtcdConfiguration.exists( '/ovs/framework/webapps|oauth2.{0}'.format(key)): authentication_metadata[key] = EtcdConfiguration.get( '/ovs/framework/webapps|oauth2.{0}'.format(key)) data['authentication_metadata'] = authentication_metadata # Gather authorization metadata if 'HTTP_AUTHORIZATION' not in request.META: return HttpResponse, dict( data.items() + {'authentication_state': 'unauthenticated'}.items()) authorization_type, access_token = request.META[ 'HTTP_AUTHORIZATION'].split(' ') if authorization_type != 'Bearer': return HttpResponse, dict( data.items() + {'authentication_state': 'invalid_authorization_type' }.items()) tokens = BearerTokenList.get_by_access_token(access_token) if len(tokens) != 1: return HttpResponse, dict( data.items() + {'authentication_state': 'invalid_token'}.items()) token = tokens[0] if token.expiration < time.time(): for junction in token.roles.itersafe(): junction.delete() token.delete() return HttpResponse, dict( data.items() + {'authentication_state': 'token_expired'}.items()) # Gather user metadata user = token.client.user if not user.is_active: return HttpResponse, dict( data.items() + {'authentication_state': 'inactive_user'}.items()) roles = [j.role.code for j in token.roles] return HttpResponse, dict( data.items() + { 'authenticated': True, 'authentication_state': 'authenticated', 'username': user.username, 'userguid': user.guid, 'roles': roles, 'plugins': plugins }.items()) except Exception as ex: MetadataView._logger.exception( 'Unexpected exception: {0}'.format(ex)) return HttpResponse, dict( data.items() + {'authentication_state': 'unexpected_exception'}.items())
def prepare_mds_service(storagerouter, vpool, fresh_only, reload_config): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. :param storagerouter: Storagerouter on which MDS service will be created :param vpool: The vPool for which the MDS service will be created :param fresh_only: If True and no current mds services exist for this vpool on this storagerouter, a new 1 will be created :param reload_config: If True, the volumedriver's updated configuration will be reloaded """ # Fetch service sequence number based on MDS services for current vPool and current storage router service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return # There is already 1 or more MDS services running, aborting # VALIDATIONS # 1. Find free port based on MDS services for all vPools on current storage router client = SSHClient(storagerouter) mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.extend(service.ports) mds_port_range = EtcdConfiguration.get( '/ovs/framework/hosts/{0}/ports|mds'.format( System.get_my_machine_id(client))) free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, nr=1, client=client) if not free_ports: raise RuntimeError( 'Failed to find an available port on storage router {0} within range {1}' .format(storagerouter.name, mds_port_range)) # 2. Partition check db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition break if db_partition is None: raise RuntimeError( 'Could not find DB partition on storage router {0}'.format( storagerouter.name)) # 3. Verify storage driver configured storagedrivers = [ sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid ] if not storagedrivers: raise RuntimeError( 'Expected to find a configured storagedriver for vpool {0} on storage router {1}' .format(vpool.name, storagerouter.name)) storagedriver = storagedrivers[0] # MODEL UPDATES # 1. Service service_number += 1 service = Service() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.ports = [free_ports[0]] service.storagerouter = storagerouter service.save() mds_service = MDSService() mds_service.vpool = vpool mds_service.number = service_number mds_service.service = service mds_service.save() # 2. Storage driver partitions from ovs.lib.storagedriver import StorageDriverController sdp = StorageDriverController.add_storagedriverpartition( storagedriver, { 'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': db_partition, 'mds_service': mds_service }) # CONFIGURATIONS # 1. Volumedriver mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service is not None: if mds_service.vpool_guid == vpool.guid: mds_nodes.append({ 'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': sdp.path, 'scratch_directory': sdp.path }) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def migrate(previous_version, master_ips=None, extra_ips=None): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at verison 3 it will execute two steps: - 1 > 2 - 2 > 3 :param previous_version: The previous version from which to start the migration. :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ logger = LogHandler.get('extensions', name='migration') working_version = previous_version # Version 1 introduced: # - Flexible SSD layout if working_version < 1: try: from ovs.extensions.generic.configuration import Configuration if Configuration.exists('ovs.arakoon'): Configuration.delete('ovs.arakoon', remove_root=True) Configuration.set('ovs.core.ovsdb', '/opt/OpenvStorage/db') except: logger.exception('Error migrating to version 1') working_version = 1 # Version 2 introduced: # - Registration if working_version < 2: try: import time from ovs.extensions.generic.configuration import Configuration if not Configuration.exists('ovs.core.registered'): Configuration.set('ovs.core.registered', False) Configuration.set('ovs.core.install_time', time.time()) except: logger.exception('Error migrating to version 2') working_version = 2 # Version 3 introduced: # - New arakoon clients if working_version < 3: try: from ovs.extensions.db.arakoon import ArakoonInstaller reload(ArakoonInstaller) from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.generic.configuration import Configuration if master_ips is not None: for ip in master_ips: client = SSHClient(ip) if client.dir_exists( ArakoonInstaller.ARAKOON_CONFIG_DIR): for cluster_name in client.dir_list( ArakoonInstaller.ARAKOON_CONFIG_DIR): try: ArakoonInstaller.deploy_cluster( cluster_name, ip) except: pass if Configuration.exists('ovs.core.storage.persistent'): Configuration.set('ovs.core.storage.persistent', 'pyrakoon') except: logger.exception('Error migrating to version 3') working_version = 3 # Version 4 introduced: # - Etcd if working_version < 4: try: import os import json from ConfigParser import RawConfigParser from ovs.extensions.db.etcd import installer reload(installer) from ovs.extensions.db.etcd.installer import EtcdInstaller from ovs.extensions.db.etcd.configuration import EtcdConfiguration from ovs.extensions.generic.system import System host_id = System.get_my_machine_id() etcd_migrate = False if EtcdInstaller.has_cluster('127.0.0.1', 'config'): etcd_migrate = True else: if master_ips is not None and extra_ips is not None: cluster_ip = None for ip in master_ips + extra_ips: if EtcdInstaller.has_cluster(ip, 'config'): cluster_ip = ip break node_ip = None path = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(path): with open(path) as config_file: config = json.load(config_file) node_ip = config['grid']['ip'] if node_ip is not None: if cluster_ip is None: EtcdInstaller.create_cluster('config', node_ip) EtcdConfiguration.initialize() EtcdConfiguration.initialize_host(host_id) else: EtcdInstaller.extend_cluster( cluster_ip, node_ip, 'config') EtcdConfiguration.initialize_host(host_id) etcd_migrate = True if etcd_migrate is True: # Migrating configuration files path = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(path): with open(path) as config_file: config = json.load(config_file) EtcdConfiguration.set('/ovs/framework/cluster_id', config['support']['cid']) if not EtcdConfiguration.exists( '/ovs/framework/install_time'): EtcdConfiguration.set( '/ovs/framework/install_time', config['core']['install_time']) else: EtcdConfiguration.set( '/ovs/framework/install_time', min( EtcdConfiguration.get( '/ovs/framework/install_time'), config['core']['install_time'])) EtcdConfiguration.set('/ovs/framework/registered', config['core']['registered']) EtcdConfiguration.set( '/ovs/framework/plugins/installed', config['plugins']) EtcdConfiguration.set('/ovs/framework/stores', config['core']['storage']) EtcdConfiguration.set( '/ovs/framework/paths', { 'cfgdir': config['core']['cfgdir'], 'basedir': config['core']['basedir'], 'ovsdb': config['core']['ovsdb'] }) EtcdConfiguration.set( '/ovs/framework/support', { 'enablesupport': config['support']['enablesupport'], 'enabled': config['support']['enabled'], 'interval': config['support']['interval'] }) EtcdConfiguration.set( '/ovs/framework/storagedriver', { 'mds_safety': config['storagedriver']['mds']['safety'], 'mds_tlogs': config['storagedriver']['mds']['tlogs'], 'mds_maxload': config['storagedriver']['mds']['maxload'] }) EtcdConfiguration.set( '/ovs/framework/webapps', { 'html_endpoint': config['webapps']['html_endpoint'], 'oauth2': config['webapps']['oauth2'] }) EtcdConfiguration.set( '/ovs/framework/messagequeue', { 'endpoints': [], 'protocol': config['core']['broker']['protocol'], 'user': config['core']['broker']['login'], 'port': config['core']['broker']['port'], 'password': config['core']['broker']['password'], 'queues': config['core']['broker']['queues'] }) host_key = '/ovs/framework/hosts/{0}{{0}}'.format( host_id) EtcdConfiguration.set( host_key.format('/storagedriver'), { 'rsp': config['storagedriver']['rsp'], 'vmware_mode': config['storagedriver']['vmware_mode'] }) EtcdConfiguration.set(host_key.format('/ports'), config['ports']) EtcdConfiguration.set( host_key.format('/setupcompleted'), config['core']['setupcompleted']) EtcdConfiguration.set( host_key.format('/versions'), config['core'].get('versions', {})) EtcdConfiguration.set(host_key.format('/type'), config['core']['nodetype']) EtcdConfiguration.set(host_key.format('/ip'), config['grid']['ip']) path = '{0}/memcacheclient.cfg'.format( EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) if os.path.exists(path): config = RawConfigParser() config.read(path) nodes = [ config.get(node.strip(), 'location').strip() for node in config.get('main', 'nodes').split(',') ] EtcdConfiguration.set( '/ovs/framework/memcache|endpoints', nodes) os.remove(path) path = '{0}/rabbitmqclient.cfg'.format( EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) if os.path.exists(path): config = RawConfigParser() config.read(path) nodes = [ config.get(node.strip(), 'location').strip() for node in config.get('main', 'nodes').split(',') ] EtcdConfiguration.set( '/ovs/framework/messagequeue|endpoints', nodes) os.remove(path) # Migrate arakoon configuration files from ovs.extensions.db.arakoon import ArakoonInstaller reload(ArakoonInstaller) from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller, ArakoonClusterConfig from ovs.extensions.generic.sshclient import SSHClient if master_ips is not None: config_dir = '/opt/OpenvStorage/config/arakoon/' for ip in master_ips: client = SSHClient(ip) if client.dir_exists(config_dir): for cluster_name in client.dir_list( config_dir): try: with open('{0}/{1}/{1}.cfg'.format( config_dir, cluster_name)) as config_file: EtcdConfiguration.set( ArakoonClusterConfig. ETCD_CONFIG_KEY.format( cluster_name), config_file.read(), raw=True) ArakoonInstaller.deploy_cluster( cluster_name, ip) except: logger.exception( 'Error migrating {0} on {1}'. format(cluster_name, ip)) client.dir_delete(config_dir) except: logger.exception('Error migrating to version 4') working_version = 4 return working_version
def process(queue, body, mapping): """ Processes the actual received body :param queue: Type of queue to be used :param body: Body of the message :param mapping: """ logger = LogHandler.get('extensions', name='processor') if queue == EtcdConfiguration.get( '/ovs/framework/messagequeue|queues.storagedriver'): cache = VolatileFactory.get_client() all_extensions = None message = FileSystemEvents.EventMessage() message.ParseFromString(body) # Possible special tags used as `arguments` key: # - [NODE_ID]: Replaced by the storagedriver_id as reported by the event # - [CLUSTER_ID]: Replaced by the clusterid as reported by the event # Possible dedupe key tags: # - [EVENT_NAME]: The name of the event message type # - [TASK_NAME]: Task method name # - [<argument value>]: Any value of the `arguments` dictionary. logger.info('Got event, processing...') event = None for extension in mapping.keys(): if not message.event.HasExtension(extension): continue event = message.event.Extensions[extension] node_id = message.node_id cluster_id = message.cluster_id for current_map in mapping[extension]: task = current_map['task'] kwargs = {} delay = 0 routing_key = 'generic' for field, target in current_map['arguments'].iteritems(): if field == '[NODE_ID]': kwargs[target] = node_id elif field == '[CLUSTER_ID]': kwargs[target] = cluster_id else: kwargs[target] = getattr(event, field) if 'options' in current_map: options = current_map['options'] if options.get('execonstoragerouter', False): storagedriver = StorageDriverList.get_by_storagedriver_id( node_id) if storagedriver is not None: routing_key = 'sr.{0}'.format( storagedriver.storagerouter.machine_id) delay = options.get('delay', 0) dedupe = options.get('dedupe', False) dedupe_key = options.get('dedupe_key', None) if dedupe is True and dedupe_key is not None: # We can't dedupe without a key key = 'ovs_dedupe_volumedriver_events_{0}'.format( dedupe_key) key = key.replace('[EVENT_NAME]', extension.full_name) key = key.replace('[TASK_NAME]', task.__class__.__name__) for kwarg_key in kwargs: key = key.replace('[{0}]'.format(kwarg_key), kwargs[kwarg_key]) key = key.replace(' ', '_') task_id = cache.get(key) if task_id: # Key exists, task was already scheduled # If task is already running, the revoke message will # be ignored revoke(task_id) _log(task, kwargs, node_id) async_result = task.s(**kwargs).apply_async( countdown=delay, routing_key=routing_key) cache.set(key, async_result.id, 600) # Store the task id new_task_id = async_result.id else: _log(task, kwargs, node_id) async_result = task.s(**kwargs).apply_async( countdown=delay, routing_key=routing_key) new_task_id = async_result.id else: async_result = task.delay(**kwargs) new_task_id = async_result.id logger.info( '[{0}] {1}({2}) started on {3} with taskid {4}. Delay: {5}s' .format(queue, task.__name__, json.dumps(kwargs), routing_key, new_task_id, delay)) if event is None: message_type = 'unknown' if all_extensions is None: all_extensions = _load_extensions() for extension in all_extensions: if message.event.HasExtension(extension): message_type = extension.full_name logger.info( 'A message with type {0} was received. Skipped.'.format( message_type)) elif queue == 'notifications.info': logger.info('Received notification from openstack...') try: body = json.loads(body) print(body) event_type = body['event_type'] logger.info( 'Processing notification for event {0}'.format(event_type)) if event_type == 'compute.instance.update': old_display_name = body['payload'].get('old_display_name') instance_id = body['payload']['instance_id'] display_name = body['payload'].get('display_name') if old_display_name and old_display_name != display_name: logger.info('Caught instance rename event') VMachineController.update_vmachine_name.apply_async( kwargs={ 'old_name': old_display_name, 'new_name': display_name, 'instance_id': instance_id }) elif event_type == 'volume.update.start': volume_id = body['payload']['volume_id'] display_name = body['payload']['display_name'] CINDER_VOLUME_UPDATE_CACHE[volume_id] = display_name elif event_type == 'volume.update.end': volume_id = body['payload']['volume_id'] display_name = body['payload']['display_name'] old_display_name = CINDER_VOLUME_UPDATE_CACHE.get(volume_id) if old_display_name and old_display_name != display_name: logger.info('Caught volume rename event') VDiskController.update_vdisk_name.apply_async( kwargs={ 'volume_id': volume_id, 'old_name': old_display_name, 'new_name': display_name }) del CINDER_VOLUME_UPDATE_CACHE[volume_id] except Exception as ex: logger.error('Processing notification failed {0}'.format(ex)) logger.info('Processed notification from openstack.') else: raise NotImplementedError( 'Queue {0} is not yet implemented'.format(queue))
def get_mds_storagedriver_config_set(vpool, check_online=False): """ Builds a configuration for all StorageRouters from a given VPool with following goals: * Primary MDS is the local one * All slaves are on different hosts * Maximum `mds.safety` nodes are returned The configuration returned is the default configuration used by the volumedriver of which in normal use-cases only the 1st entry is used, because at volume creation time, the volumedriver needs to create 1 master MDS During ensure_safety, we actually create/set the MDS slaves for each volume :param vpool: vPool to get storagedriver configuration for :param check_online: Check whether the storage routers are actually responsive """ mds_per_storagerouter = {} mds_per_load = {} for storagedriver in vpool.storagedrivers: storagerouter = storagedriver.storagerouter if check_online is True: try: client = SSHClient(storagerouter) client.run('pwd') except UnableToConnectException: continue mds_service, load = MDSServiceController.get_preferred_mds( storagerouter, vpool, include_load=True) mds_per_storagerouter[storagerouter] = { 'host': storagerouter.ip, 'port': mds_service.service.ports[0] } if load not in mds_per_load: mds_per_load[load] = [] mds_per_load[load].append(storagerouter) safety = EtcdConfiguration.get( '/ovs/framework/storagedriver|mds_safety') config_set = {} for storagerouter, ip_info in mds_per_storagerouter.iteritems(): primary_failure_domain = storagerouter.primary_failure_domain secondary_failure_domain = storagerouter.secondary_failure_domain config_set[storagerouter.guid] = [ip_info] for load in sorted(mds_per_load): if len(config_set[storagerouter.guid]) >= safety: break other_storagerouters = mds_per_load[load] random.shuffle(other_storagerouters) for other_storagerouter in other_storagerouters: if len(config_set[storagerouter.guid]) >= safety: break if other_storagerouter != storagerouter and other_storagerouter in primary_failure_domain.primary_storagerouters: config_set[storagerouter.guid].append( mds_per_storagerouter[other_storagerouter]) if secondary_failure_domain is not None: for load in sorted(mds_per_load): if len(config_set[storagerouter.guid]) >= safety: break other_storagerouters = mds_per_load[load] random.shuffle(other_storagerouters) for other_storagerouter in other_storagerouters: if len(config_set[storagerouter.guid]) >= safety: break if other_storagerouter != storagerouter and other_storagerouter in secondary_failure_domain.primary_storagerouters: config_set[storagerouter.guid].append( mds_per_storagerouter[other_storagerouter]) return config_set