def list(self, vpool_guid=None): """ Overview of all StorageDrivers :param vpool_guid: Guid of the vPool :type vpool_guid: str """ if vpool_guid is not None: return VPool(vpool_guid).storagedrivers return StorageDriverList.get_storagedrivers()
def getEdgeconnection(vpoolname=VPOOLNAME): protocol = getEdgeProtocol() storagedrivers = list(StorageDriverList.get_storagedrivers()) random.shuffle(storagedrivers) for storagedriver in storagedrivers: if storagedriver.status == 'FAILURE': continue if (vpoolname is not None and storagedriver.vpool.name == vpoolname) or \ (vpoolname is None and storagedriver.vpool.name != VPOOLNAME): return storagedriver.storage_ip, storagedriver.ports['edge'], protocol return None, None, protocol
def create(self): """ Prepares a new Storagedriver for a given vPool and Storagerouter :return: None :rtype: NoneType """ if self.sr_installer is None: raise RuntimeError('No StorageRouterInstaller instance found') machine_id = System.get_my_machine_id(client=self.sr_installer.root_client) port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id)) storagerouter = self.sr_installer.storagerouter with volatile_mutex('add_vpool_get_free_ports_{0}'.format(machine_id), wait=30): model_ports_in_use = [] for sd in StorageDriverList.get_storagedrivers(): if sd.storagerouter_guid == storagerouter.guid: model_ports_in_use += sd.ports.values() for proxy in sd.alba_proxies: model_ports_in_use.append(proxy.service.ports[0]) ports = System.get_free_ports(selected_range=port_range, exclude=model_ports_in_use, amount=4 + self.sr_installer.requested_proxies, client=self.sr_installer.root_client) vpool = self.vp_installer.vpool vrouter_id = '{0}{1}'.format(vpool.name, machine_id) storagedriver = StorageDriver() storagedriver.name = vrouter_id.replace('_', ' ') storagedriver.ports = {'management': ports[0], 'xmlrpc': ports[1], 'dtl': ports[2], 'edge': ports[3]} storagedriver.vpool = vpool storagedriver.cluster_ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(machine_id)) storagedriver.storage_ip = self.storage_ip storagedriver.mountpoint = '/mnt/{0}'.format(vpool.name) storagedriver.description = storagedriver.name storagedriver.storagerouter = storagerouter storagedriver.storagedriver_id = vrouter_id storagedriver.save() # ALBA Proxies proxy_service_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ALBA_PROXY) for proxy_id in xrange(self.sr_installer.requested_proxies): service = Service() service.storagerouter = storagerouter service.ports = [ports[4 + proxy_id]] service.name = 'albaproxy_{0}_{1}'.format(vpool.name, proxy_id) service.type = proxy_service_type service.save() alba_proxy = AlbaProxy() alba_proxy.service = service alba_proxy.storagedriver = storagedriver alba_proxy.save() self.storagedriver = storagedriver
def get_storagedrivers_in_same_domain(domain_guid): """ Get storagerouter guids in a domain :param domain_guid: guid of a domain :type domain_guid: str :return: list of storagerouter guids :rtype: list """ return [ storagedriver for storagedriver in StorageDriverList.get_storagedrivers() if domain_guid in storagedriver.storagerouter.regular_domains ]
def delete_snapshots(timestamp=None): # type: (float) -> GroupResult """ Delete snapshots based on the retention policy Offloads concurrency to celery Returns a GroupResult. Waiting for the result can be done using result.get() :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used :type timestamp: float :return: The GroupResult :rtype: GroupResult """ # The result cannot be fetched in this task group_id = uuid() return group(GenericController.delete_snapshots_storagedriver.s(storagedriver.guid, timestamp, group_id) for storagedriver in StorageDriverList.get_storagedrivers()).apply_async(task_id=group_id)
def update_vmachine_config(vmachine, vm_object, pmachine=None): """ Update a vMachine configuration with a given vMachine configuration :param vmachine: Virtual Machine to update :param vm_object: New virtual machine info :param pmachine: Physical machine of the virtual machine """ try: vdisks_synced = 0 if vmachine.name is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_created', 'metadata': {'name': vm_object['name']}}) elif vmachine.name != vm_object['name']: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_renamed', 'metadata': {'old_name': vmachine.name, 'new_name': vm_object['name']}}) if pmachine is not None: vmachine.pmachine = pmachine vmachine.name = vm_object['name'] vmachine.hypervisor_id = vm_object['id'] vmachine.devicename = vm_object['backing']['filename'] vmachine.save() # Updating and linking disks storagedrivers = StorageDriverList.get_storagedrivers() datastores = dict([('{0}:{1}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers]) vdisk_guids = [] mutex = volatile_mutex('{0}_{1}'.format(vmachine.name, vmachine.devicename)) for disk in vm_object['disks']: ensure_safety = False if disk['datastore'] in vm_object['datastores']: datastore = vm_object['datastores'][disk['datastore']] if datastore in datastores: try: mutex.acquire(wait=10) vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool) if vdisk is None: # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario vdisk = VDisk() vdisk.vpool = datastores[datastore].vpool vdisk.reload_client() vdisk.devicename = disk['filename'] vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename'])) vdisk.size = vdisk.info['volume_size'] vdisk.metadata = {'lba_size': vdisk.info['lba_size'], 'cluster_multiplier': vdisk.info['cluster_multiplier']} # Create the disk in a locked context, but don't execute long running-task in same context vdisk.save() ensure_safety = True finally: mutex.release() if ensure_safety: MDSServiceController.ensure_safety(vdisk) VDiskController.dtl_checkup(vdisk_guid=vdisk.guid) # Update the disk with information from the hypervisor if vdisk.vmachine is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_attached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': disk['name']}}) vdisk.vmachine = vmachine vdisk.name = disk['name'] vdisk.order = disk['order'] vdisk.save() vdisk_guids.append(vdisk.guid) vdisks_synced += 1 for vdisk in vmachine.vdisks: if vdisk.guid not in vdisk_guids: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_detached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': vdisk.name}}) vdisk.vmachine = None vdisk.save() VMachineController._logger.info('Updating vMachine finished (name {0}, {1} vdisks (re)linked)'.format( vmachine.name, vdisks_synced )) except Exception as ex: VMachineController._logger.info('Error during vMachine update: {0}'.format(str(ex))) raise
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... """ MigrationController._logger.info('Preparing out of band migrations...') from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vpoollist import VPoolList from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs_extensions.services.interfaces.systemd import Systemd from ovs.extensions.services.servicefactory import ServiceFactory from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.lib.generic import GenericController MigrationController._logger.info('Start out of band migrations...') service_manager = ServiceFactory.get_manager() sr_client_map = {} for storagerouter in StorageRouterList.get_storagerouters(): sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter, username='******') ######################################################### # Addition of 'ExecReload' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ################################################################## # Adjustment of open file descriptors for Arakoon services to 8192 changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'LimitNOFILE=8192' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'limit nofile 8192 8192' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_service_name=service_name) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ############################# # Migrate to multiple proxies for storagedriver in StorageDriverList.get_storagedrivers(): vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: # Rename alba_proxy service in model service = alba_proxy.service old_service_name = 'albaproxy_{0}'.format(vpool.name) new_service_name = 'albaproxy_{0}_0'.format(vpool.name) if old_service_name != service.name: continue service.name = new_service_name service.save() if not service_manager.has_service(name=old_service_name, client=root_client): continue old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name) if not Configuration.exists(key=old_configuration_key): continue # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_service_name=old_service_name, new_service_name=new_service_name) # Register new service and remove old service service_manager.add_service(name='ovs-albaproxy', client=root_client, params=Configuration.get(old_configuration_key), target_name='ovs-{0}'.format(new_service_name)) # Update scrub proxy config proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid) proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key) if proxy_config is not None: fragment_cache = proxy_config.get('fragment_cache', ['none', {}]) if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True: # Accelerated ALBA configured fragment_cache_scrub_info = copy.deepcopy(fragment_cache) fragment_cache_scrub_info[1]['cache_on_read'] = False proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid) proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key) if proxy_scrub_config is not None and proxy_scrub_config['fragment_cache'] == ['none']: proxy_scrub_config['fragment_cache'] = fragment_cache_scrub_info Configuration.set(proxy_scrub_config_key, json.dumps(proxy_scrub_config, indent=4), raw=True) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] if current_config.get('backend_type') != 'MULTI': changes = True backend_connection_manager = {'backend_type': 'MULTI'} for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])): backend_connection_manager[str(index)] = copy.deepcopy(current_config) # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_use_rora'] = True # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000 # noinspection PyUnresolvedReferences for key, value in backend_connection_manager[str(index)].items(): if key.startswith('backend_interface'): backend_connection_manager[key] = value # noinspection PyUnresolvedReferences del backend_connection_manager[str(index)][key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: backend_connection_manager[key] = value else: backend_connection_manager = current_config for value in backend_connection_manager.values(): if isinstance(value, dict): for key, val in value.items(): if key.startswith('backend_interface'): backend_connection_manager[key] = val changes = True del value[key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: changes = True backend_connection_manager[key] = value if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**backend_connection_manager) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_service_name='volumedriver_{0}'.format(vpool.name)) if service_manager.ImplementationClass == Systemd: root_client.run(['systemctl', 'daemon-reload']) ######################################## # Update metadata_store_bits information for vpool in VPoolList.get_vpools(): bits = None for storagedriver in vpool.storagedrivers: key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name) if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key): if bits is None: entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name), client=sr_client_map[storagedriver.storagerouter_guid], entries=['METADATASTORE_BITS=']) if len(entries) == 1: bits = entries[0].split('=')[-1] bits = int(bits) if bits.isdigit() else 5 if bits is not None: try: content = Configuration.get(key=key) content['METADATASTORE_BITS'] = bits Configuration.set(key=key, value=content) except: MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name)) if bits is not None: vpool.metadata_store_bits = bits vpool.save() MigrationController._logger.info('Finished out of band migrations') GenericController.refresh_package_information()
def gather_scrub_work(): logger.info("Divide scrubbing work among allowed Storage Routers") scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: logger.info( "Scrub partition found on Storage Router {0}: {1}".format(storage_driver.name, partition.folder) ) if storage_driver.storagerouter not in scrub_locations: try: _ = SSHClient(storage_driver.storagerouter.ip) scrub_locations[storage_driver.storagerouter] = str(partition.path) except UnableToConnectException: logger.warning("StorageRouter {0} is not reachable".format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError("No scrub locations found") vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0: vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0: vdisk_guids.add(vdisk.guid) logger.info("Found {0} virtual disks which need to be check for scrub work".format(len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_scrub_location = None local_vdisks_to_scrub = [] result_set = ResultSet([]) storage_router_list = [] for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid logger.info( "Executing scrub work on {0} Storage Router {1} for {2} virtual disks".format( "local" if local is True else "remote", storage_router.name, len(vdisk_guids_to_scrub) ) ) if local is True: local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set.add( ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub ).apply_async(routing_key="sr.{0}".format(storage_router.machine_id)) ) storage_router_list.append(storage_router) logger.info("Launched scrub task on Storage Router {0}".format(storage_router.name)) # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub ) all_results = result_set.join( propagate=False ) # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail for index, result in enumerate(all_results): if result is not None: logger.error( "Scrubbing failed on Storage Router {0} with error {1}".format( storage_router_list[index].name, result ) )
def migrate(previous_version): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at version 3 it will execute two steps: - 1 > 2 - 2 > 3 @param previous_version: The previous version from which to start the migration. """ working_version = previous_version # Version 1 introduced: # - The datastore is still empty, add defaults if working_version < 1: from ovs.dal.hybrids.user import User from ovs.dal.hybrids.group import Group from ovs.dal.hybrids.role import Role from ovs.dal.hybrids.client import Client from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.hybrids.j_rolegroup import RoleGroup from ovs.dal.hybrids.j_roleclient import RoleClient from ovs.dal.hybrids.backendtype import BackendType from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.hybrids.branding import Branding from ovs.dal.lists.backendtypelist import BackendTypeList # Create groups admin_group = Group() admin_group.name = 'administrators' admin_group.description = 'Administrators' admin_group.save() viewers_group = Group() viewers_group.name = 'viewers' viewers_group.description = 'Viewers' viewers_group.save() # Create users admin = User() admin.username = '******' admin.password = hashlib.sha256('admin').hexdigest() admin.is_active = True admin.group = admin_group admin.save() # Create internal OAuth 2 clients admin_pw_client = Client() admin_pw_client.ovs_type = 'INTERNAL' admin_pw_client.grant_type = 'PASSWORD' admin_pw_client.user = admin admin_pw_client.save() admin_cc_client = Client() admin_cc_client.ovs_type = 'INTERNAL' admin_cc_client.grant_type = 'CLIENT_CREDENTIALS' admin_cc_client.client_secret = ''.join( random.choice(string.ascii_letters + string.digits + '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128)) admin_cc_client.user = admin admin_cc_client.save() # Create roles read_role = Role() read_role.code = 'read' read_role.name = 'Read' read_role.description = 'Can read objects' read_role.save() write_role = Role() write_role.code = 'write' write_role.name = 'Write' write_role.description = 'Can write objects' write_role.save() manage_role = Role() manage_role.code = 'manage' manage_role.name = 'Manage' manage_role.description = 'Can manage the system' manage_role.save() # Attach groups to roles mapping = [(admin_group, [read_role, write_role, manage_role]), (viewers_group, [read_role])] for setting in mapping: for role in setting[1]: rolegroup = RoleGroup() rolegroup.group = setting[0] rolegroup.role = role rolegroup.save() for user in setting[0].users: for role in setting[1]: for client in user.clients: roleclient = RoleClient() roleclient.client = client roleclient.role = role roleclient.save() # Add backends for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'), ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in [ ServiceType.SERVICE_TYPES.MD_SERVER, ServiceType.SERVICE_TYPES.ALBA_PROXY, ServiceType.SERVICE_TYPES.ARAKOON ]: service_type = ServiceType() service_type.name = service_type_info service_type.save() # Branding branding = Branding() branding.name = 'Default' branding.description = 'Default bootstrap theme' branding.css = 'bootstrap-default.min.css' branding.productname = 'Open vStorage' branding.is_default = True branding.save() slate = Branding() slate.name = 'Slate' slate.description = 'Dark bootstrap theme' slate.css = 'bootstrap-slate.min.css' slate.productname = 'Open vStorage' slate.is_default = False slate.save() # Failure Domain failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() # We're now at version 1 working_version = 1 # Version 2 introduced: # - new Descriptor format if working_version < 2: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[ entry] and 'hybrids' in data[entry]['source']: filename = data[entry]['source'] if not filename.startswith('/'): filename = '/opt/OpenvStorage/ovs/dal/{0}'.format( filename) module = imp.load_source(data[entry]['name'], filename) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) # We're now at version 2 working_version = 2 # Version 3 introduced: # - new Descriptor format if working_version < 3: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry]: module = imp.load_source(data[entry]['name'], data[entry]['source']) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) working_version = 3 # Version 4 introduced: # - Flexible SSD layout if working_version < 4: import os from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration for service in ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.MD_SERVER).services: mds_service = service.mds_service storagedriver = None for current_storagedriver in service.storagerouter.storagedrivers: if current_storagedriver.vpool_guid == mds_service.vpool_guid: storagedriver = current_storagedriver break tasks = {} if storagedriver._data.get('mountpoint_md'): tasks['{0}/mds_{1}_{2}'.format( storagedriver._data.get('mountpoint_md'), storagedriver.vpool.name, mds_service.number)] = ( DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS) if storagedriver._data.get('mountpoint_temp'): tasks['{0}/mds_{1}_{2}'.format( storagedriver._data.get('mountpoint_temp'), storagedriver.vpool.name, mds_service.number)] = ( DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS) for disk in service.storagerouter.disks: for partition in disk.partitions: for directory, (role, subrole) in tasks.iteritems(): with remote(storagedriver.storagerouter.ip, [os], username='******') as rem: stat_dir = directory while not rem.os.path.exists( stat_dir) and stat_dir != '/': stat_dir = stat_dir.rsplit('/', 1)[0] if not stat_dir: stat_dir = '/' inode = rem.os.stat(stat_dir).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.mds_service == mds_service: migrated = True break if sd_partition.partition_guid == partition.guid: number = max( sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.mds_service = mds_service sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() client = SSHClient( storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.dir_create(directory) client.dir_chown(directory, 'ovs', 'ovs') client.symlink( {sd_partition.path: directory}) for storagedriver in StorageDriverList.get_storagedrivers(): migrated_objects = {} for disk in storagedriver.storagerouter.disks: for partition in disk.partitions: # Process all mountpoints that are unique and don't have a specified size for key, (role, sr_info) in { 'mountpoint_md': (DiskPartition.ROLES.DB, { 'metadata_{0}': StorageDriverPartition.SUBROLE.MD, 'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG }), 'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, { 'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE }), 'mountpoint_foc': (DiskPartition.ROLES.WRITE, { 'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL }), 'mountpoint_dtl': (DiskPartition.ROLES.WRITE, { 'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL }), 'mountpoint_readcaches': (DiskPartition.ROLES.READ, { '': None }), 'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, { 'sco_{0}': StorageDriverPartition.SUBROLE.SCO }) }.iteritems(): if key in storagedriver._data: is_list = isinstance(storagedriver._data[key], list) entries = storagedriver._data[ key][:] if is_list is True else [ storagedriver._data[key] ] for entry in entries: if not entry: if is_list: storagedriver._data[key].remove( entry) if len(storagedriver._data[key] ) == 0: del storagedriver._data[key] else: del storagedriver._data[key] else: with remote( storagedriver.storagerouter.ip, [os], username='******') as rem: inode = rem.os.stat(entry).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() for folder, subrole in sr_info.iteritems( ): number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.partition_guid == partition.guid: number = max( sd_partition. number, number) if migrated is False: sd_partition = StorageDriverPartition( ) sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() if folder: source = '{0}/{1}'.format( entry, folder.format( storagedriver. vpool.name)) else: source = entry client = SSHClient( storagedriver. storagerouter, username='******') path = sd_partition.path.rsplit( '/', 1)[0] if path: client.dir_create(path) client.dir_chown( path, 'ovs', 'ovs') client.symlink({ sd_partition.path: source }) migrated_objects[ source] = sd_partition if is_list: storagedriver._data[ key].remove(entry) if len(storagedriver._data[key] ) == 0: del storagedriver._data[ key] else: del storagedriver._data[key] storagedriver.save() if 'mountpoint_bfs' in storagedriver._data: storagedriver.mountpoint_dfs = storagedriver._data[ 'mountpoint_bfs'] if not storagedriver.mountpoint_dfs: storagedriver.mountpoint_dfs = None del storagedriver._data['mountpoint_bfs'] storagedriver.save() if 'mountpoint_temp' in storagedriver._data: del storagedriver._data['mountpoint_temp'] storagedriver.save() if migrated_objects: print 'Loading sizes' config = StorageDriverConfiguration( 'storagedriver', storagedriver.vpool_guid, storagedriver.storagedriver_id) config.load() for readcache in config.configuration.get( 'content_addressed_cache', {}).get('clustercache_mount_points', []): path = readcache.get('path', '').rsplit('/', 1)[0] size = int(readcache['size'].strip( 'KiB')) * 1024 if 'size' in readcache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() for writecache in config.configuration.get( 'scocache', {}).get('scocache_mount_points', []): path = writecache.get('path', '') size = int(writecache['size'].strip( 'KiB')) * 1024 if 'size' in writecache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() working_version = 4 # Version 5 introduced: # - Failure Domains if working_version < 5: import os from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.lists.failuredomainlist import FailureDomainList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient failure_domains = FailureDomainList.get_failure_domains() if len(failure_domains) > 0: failure_domain = failure_domains[0] else: failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() for storagerouter in StorageRouterList.get_storagerouters(): change = False if storagerouter.primary_failure_domain is None: storagerouter.primary_failure_domain = failure_domain change = True if storagerouter.rdma_capable is None: client = SSHClient(storagerouter, username='******') rdma_capable = False with remote(client.ip, [os], username='******') as rem: for root, dirs, files in rem.os.walk( '/sys/class/infiniband'): for directory in dirs: ports_dir = '/'.join( [root, directory, 'ports']) if not rem.os.path.exists(ports_dir): continue for sub_root, sub_dirs, _ in rem.os.walk( ports_dir): if sub_root != ports_dir: continue for sub_directory in sub_dirs: state_file = '/'.join( [sub_root, sub_directory, 'state']) if rem.os.path.exists(state_file): if 'ACTIVE' in client.run( 'cat {0}'.format( state_file)): rdma_capable = True storagerouter.rdma_capable = rdma_capable change = True if change is True: storagerouter.save() working_version = 5 # Version 6 introduced: # - Distributed scrubbing if working_version < 6: from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.sshclient import SSHClient for storage_driver in StorageDriverList.get_storagedrivers(): root_client = SSHClient(storage_driver.storagerouter, username='******') for partition in storage_driver.partitions: if partition.role == DiskPartition.ROLES.SCRUB: old_path = partition.path partition.sub_role = None partition.save() partition.invalidate_dynamics(['folder', 'path']) if root_client.dir_exists(partition.path): continue # New directory already exists if '_mds_' in old_path: if root_client.dir_exists(old_path): root_client.symlink({partition.path: old_path}) if not root_client.dir_exists(partition.path): root_client.dir_create(partition.path) root_client.dir_chmod(partition.path, 0777) working_version = 6 # Version 7 introduced: # - vPool status if working_version < 7: from ovs.dal.hybrids import vpool reload(vpool) from ovs.dal.hybrids.vpool import VPool from ovs.dal.lists.vpoollist import VPoolList for _vpool in VPoolList.get_vpools(): vpool = VPool(_vpool.guid) if hasattr(vpool, 'status') and vpool.status is None: vpool.status = VPool.STATUSES.RUNNING vpool.save() working_version = 7 # Version 10 introduced: # - Reverse indexes are stored in persistent store # - Store more non-changing metadata on disk iso using a dynamic property if working_version < 10: from ovs.dal.helpers import HybridRunner, Descriptor from ovs.dal.datalist import DataList from ovs.extensions.storage.persistentfactory import PersistentFactory from ovs.extensions.storage.volatilefactory import VolatileFactory persistent = PersistentFactory.get_client() for prefix in ['ovs_listcache', 'ovs_reverseindex']: for key in persistent.prefix(prefix): persistent.delete(key) for key in persistent.prefix('ovs_data_'): persistent.set(key, persistent.get(key)) base_reverse_key = 'ovs_reverseindex_{0}_{1}|{2}|{3}' hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() all_objects = DataList(cls, { 'type': DataList.where_operator.AND, 'items': [] }) for item in all_objects: guid = item.guid for relation in item._relations: if relation.foreign_type is None: rcls = cls rclsname = rcls.__name__.lower() else: rcls = relation.foreign_type rclsname = rcls.__name__.lower() key = relation.name rguid = item._data[key]['guid'] if rguid is not None: reverse_key = base_reverse_key.format( rclsname, rguid, relation.foreign_key, guid) persistent.set(reverse_key, 0) volatile = VolatileFactory.get_client() try: volatile._client.flush_all() except: pass from ovs.dal.lists.vdisklist import VDiskList for vdisk in VDiskList.get_vdisks(): try: vdisk.metadata = { 'lba_size': vdisk.info['lba_size'], 'cluster_multiplier': vdisk.info['cluster_multiplier'] } vdisk.save() except: pass working_version = 10 # Version 11 introduced: # - ALBA accelerated ALBA, meaning different vpool.metadata information if working_version < 11: from ovs.dal.lists.vpoollist import VPoolList for vpool in VPoolList.get_vpools(): vpool.metadata = {'backend': vpool.metadata} if 'metadata' in vpool.metadata['backend']: vpool.metadata['backend'][ 'arakoon_config'] = vpool.metadata['backend'].pop( 'metadata') if 'backend_info' in vpool.metadata['backend']: vpool.metadata['backend']['backend_info'][ 'fragment_cache_on_read'] = True vpool.metadata['backend']['backend_info'][ 'fragment_cache_on_write'] = False vpool.save() working_version = 11 return working_version
def update_vmachine_config(vmachine, vm_object, pmachine=None): """ Update a vMachine configuration with a given vMachine configuration """ try: vdisks_synced = 0 if vmachine.name is None: MessageController.fire( MessageController.Type.EVENT, { 'type': 'vmachine_created', 'metadata': { 'name': vm_object['name'] } }) elif vmachine.name != vm_object['name']: MessageController.fire( MessageController.Type.EVENT, { 'type': 'vmachine_renamed', 'metadata': { 'old_name': vmachine.name, 'new_name': vm_object['name'] } }) if pmachine is not None: vmachine.pmachine = pmachine vmachine.name = vm_object['name'] vmachine.hypervisor_id = vm_object['id'] vmachine.devicename = vm_object['backing']['filename'] vmachine.save() # Updating and linking disks storagedrivers = StorageDriverList.get_storagedrivers() datastores = dict([('{}:{}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers]) vdisk_guids = [] for disk in vm_object['disks']: if disk['datastore'] in vm_object['datastores']: datastore = vm_object['datastores'][disk['datastore']] if datastore in datastores: vdisk = VDiskList.get_by_devicename_and_vpool( disk['filename'], datastores[datastore].vpool) if vdisk is None: # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario vdisk = VDisk() vdisk.vpool = datastores[datastore].vpool vdisk.reload_client() vdisk.devicename = disk['filename'] vdisk.volume_id = vdisk.storagedriver_client.get_volume_id( str(disk['backingfilename'])) vdisk.size = vdisk.info['volume_size'] MDSServiceController.ensure_safety(vdisk) # Update the disk with information from the hypervisor if vdisk.vmachine is None: MessageController.fire( MessageController.Type.EVENT, { 'type': 'vdisk_attached', 'metadata': { 'vmachine_name': vmachine.name, 'vdisk_name': disk['name'] } }) vdisk.vmachine = vmachine vdisk.name = disk['name'] vdisk.order = disk['order'] vdisk.save() vdisk_guids.append(vdisk.guid) vdisks_synced += 1 for vdisk in vmachine.vdisks: if vdisk.guid not in vdisk_guids: MessageController.fire( MessageController.Type.EVENT, { 'type': 'vdisk_detached', 'metadata': { 'vmachine_name': vmachine.name, 'vdisk_name': vdisk.name } }) vdisk.vmachine = None vdisk.save() logger.info( 'Updating vMachine finished (name {}, {} vdisks (re)linked)'. format(vmachine.name, vdisks_synced)) except Exception as ex: logger.info('Error during vMachine update: {0}'.format(str(ex))) raise
def _bootstrap_dal_models(self): """ Load/hook dal models as snmp oids """ _guids = set() enabled_key = "{0}_config_dal_enabled".format(STORAGE_PREFIX) self.instance_oid = 0 try: enabled = self.persistent.get(enabled_key) except KeyNotFoundException: enabled = True # Enabled by default, can be disabled by setting the key if enabled: from ovs.dal.lists.vdisklist import VDiskList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.pmachinelist import PMachineList from ovs.dal.lists.vmachinelist import VMachineList from ovs.dal.lists.vpoollist import VPoolList from ovs.dal.lists.storagedriverlist import StorageDriverList for storagerouter in StorageRouterList.get_storagerouters(): _guids.add(storagerouter.guid) if not self._check_added(storagerouter): self._register_dal_model(10, storagerouter, 'guid', "0") self._register_dal_model(10, storagerouter, 'name', "1") self._register_dal_model(10, storagerouter, 'pmachine', "3", key = 'host_status') self._register_dal_model(10, storagerouter, 'description', "4") self._register_dal_model(10, storagerouter, 'devicename', "5") self._register_dal_model(10, storagerouter, 'dtl_mode', "6") self._register_dal_model(10, storagerouter, 'ip', "8") self._register_dal_model(10, storagerouter, 'machineid', "9") self._register_dal_model(10, storagerouter, 'status', "10") self._register_dal_model(10, storagerouter, '#vdisks', "11", func = lambda storagerouter: len([vdisk for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id]), atype = int) self._register_dal_model(10, storagerouter, '#vmachines', "12", func = lambda storagerouter: len(set([vdisk.vmachine.guid for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id])), atype = int) self._register_dal_model(10, storagerouter, '#stored_data', "13", func = lambda storagerouter: sum([vdisk.vmachine.stored_data for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id]), atype = int) self.instance_oid += 1 for vm in VMachineList.get_vmachines(): _guids.add(vm.guid) if not self._check_added(vm): if vm.is_vtemplate: self._register_dal_model(11, vm, 'guid', "0") self._register_dal_model(11, vm, 'name', "1") def _children(vmt): children = 0 disks = [vd.guid for vd in vmt.vdisks] for vdisk in [vdisk.parent_vdisk_guid for item in [vm.vdisks for vm in VMachineList.get_vmachines() if not vm.is_vtemplate] for vdisk in item]: for disk in disks: if vdisk == disk: children += 1 return children self._register_dal_model(11, vm, '#children', 2, func = _children, atype = int) self.instance_oid += 1 for vm in VMachineList.get_vmachines(): _guids.add(vm.guid) if not self._check_added(vm): if not vm.is_vtemplate: self._register_dal_model(0, vm, 'guid', "0") self._register_dal_model(0, vm, 'name', "1") self._register_dal_model(0, vm, 'statistics', "2.0", key = "operations", atype = int) self._register_dal_model(0, vm, 'statistics', "2.1", key = "cluster_cache_misses_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.2", key = "data_read", atype = int) self._register_dal_model(0, vm, 'statistics', "2.3", key = "sco_cache_misses", atype = int) self._register_dal_model(0, vm, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.5", key = "sco_cache_hits", atype = int) self._register_dal_model(0, vm, 'statistics', "2.6", key = "write_operations", atype = int) self._register_dal_model(0, vm, 'statistics', "2.7", key = "cluster_cache_misses", atype = int) self._register_dal_model(0, vm, 'statistics', "2.8", key = "read_operations_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.10", key = "backend_write_operations", atype = int) self._register_dal_model(0, vm, 'statistics', "2.11", key = "backend_data_read", atype = int) self._register_dal_model(0, vm, 'statistics', "2.12", key = "cache_hits", atype = int) self._register_dal_model(0, vm, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.15", key = "metadata_store_misses", atype = int) self._register_dal_model(0, vm, 'statistics', "2.16", key = "backend_data_written", atype = int) self._register_dal_model(0, vm, 'statistics', "2.17", key = "data_read_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.18", key = "read_operations", atype = int) self._register_dal_model(0, vm, 'statistics', "2.19", key = "cluster_cache_hits", atype = int) self._register_dal_model(0, vm, 'statistics', "2.20", key = "data_written_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.22", key = "cache_hits_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.23", key = "timestamp", atype = int) self._register_dal_model(0, vm, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.25", key = "backend_data_written_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.26", key = "backend_read_operations", atype = int) self._register_dal_model(0, vm, 'statistics', "2.27", key = "data_written", atype = int) self._register_dal_model(0, vm, 'statistics', "2.28", key = "metadata_store_hits", atype = int) self._register_dal_model(0, vm, 'statistics', "2.29", key = "backend_data_read_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.30", key = "operations_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.32", key = "data_transferred_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.33", key = "write_operations_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.34", key = "data_transferred", atype = int) self._register_dal_model(0, vm, 'stored_data', "3", atype = int) self._register_dal_model(0, vm, 'description', "4") self._register_dal_model(0, vm, 'devicename', "5") self._register_dal_model(0, vm, 'dtl_mode', "6") self._register_dal_model(0, vm, 'hypervisorid', "7") self._register_dal_model(0, vm, 'ip', "8") self._register_dal_model(0, vm, 'status', "10") self._register_dal_model(0, vm, 'stored_data', "10", atype = int) self._register_dal_model(0, vm, 'snapshots', "11", atype = int) self._register_dal_model(0, vm, 'vdisks', "12", atype = int) self._register_dal_model(0, vm, 'DTL', '13', func = lambda vm: 'DEGRADED' if all(item == 'DEGRADED' for item in [vd.info['failover_mode'] for vd in vm.vdisks]) else 'OK') self.instance_oid += 1 for vd in VDiskList.get_vdisks(): _guids.add(vd.guid) if not self._check_added(vd): self._register_dal_model(1, vd, 'guid', "0") self._register_dal_model(1, vd, 'name', "1") self._register_dal_model(1, vd, 'statistics', "2.0", key = "operations", atype = int) self._register_dal_model(1, vd, 'statistics', "2.1", key = "data_written_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.2", key = "data_read", atype = int) self._register_dal_model(1, vd, 'statistics', "2.3", key = "sco_cache_misses", atype = int) self._register_dal_model(1, vd, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.5", key = "sco_cache_hits", atype = int) self._register_dal_model(1, vd, 'statistics', "2.6", key = "write_operations", atype = int) self._register_dal_model(1, vd, 'statistics', "2.7", key = "cluster_cache_misses", atype = int) self._register_dal_model(1, vd, 'statistics', "2.8", key = "read_operations_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.10", key = "backend_write_operations", atype = int) self._register_dal_model(1, vd, 'statistics', "2.11", key = "backend_data_read", atype = int) self._register_dal_model(1, vd, 'statistics', "2.12", key = "cache_hits", atype = int) self._register_dal_model(1, vd, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.15", key = "metadata_store_misses", atype = int) self._register_dal_model(1, vd, 'statistics', "2.16", key = "backend_data_written", atype = int) self._register_dal_model(1, vd, 'statistics', "2.17", key = "data_read_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.18", key = "read_operations", atype = int) self._register_dal_model(1, vd, 'statistics', "2.19", key = "cluster_cache_hits", atype = int) self._register_dal_model(1, vd, 'statistics', "2.20", key = "cluster_cache_misses_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.22", key = "cache_hits_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.23", key = "timestamp", atype = int) self._register_dal_model(1, vd, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.25", key = "backend_data_written_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.26", key = "backend_read_operations", atype = int) self._register_dal_model(1, vd, 'statistics', "2.27", key = "data_written", atype = int) self._register_dal_model(1, vd, 'statistics', "2.28", key = "metadata_store_hits", atype = int) self._register_dal_model(1, vd, 'statistics', "2.29", key = "backend_data_read_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.30", key = "operations_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.32", key = "data_transferred_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.33", key = "write_operations_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.34", key = "data_transferred", atype = int) self._register_dal_model(1, vd, 'info', "3", key = 'stored', atype = int) self._register_dal_model(1, vd, 'info', "4", key = 'failover_mode', atype = int) self._register_dal_model(1, vd, 'snapshots', "5", atype = int) self.instance_oid += 1 for pm in PMachineList.get_pmachines(): _guids.add(pm.guid) if not self._check_added(pm): self._register_dal_model(2, pm, 'guid', "0") self._register_dal_model(2, pm, 'name', "1") self._register_dal_model(2, pm, 'host_status', "2") self.instance_oid += 1 for vp in VPoolList.get_vpools(): _guids.add(vp.guid) if not self._check_added(vp): self._register_dal_model(3, vp, 'guid', "0") self._register_dal_model(3, vp, 'name', "1") self._register_dal_model(3, vp, 'statistics', "2.0", key = "operations", atype = int) self._register_dal_model(3, vp, 'statistics', "2.1", key = "cluster_cache_misses_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.2", key = "data_read", atype = int) self._register_dal_model(3, vp, 'statistics', "2.3", key = "sco_cache_misses", atype = int) self._register_dal_model(3, vp, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.5", key = "sco_cache_hits", atype = int) self._register_dal_model(3, vp, 'statistics', "2.6", key = "write_operations", atype = int) self._register_dal_model(3, vp, 'statistics', "2.7", key = "cluster_cache_misses", atype = int) self._register_dal_model(3, vp, 'statistics', "2.8", key = "read_operations_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.10", key = "backend_write_operations", atype = int) self._register_dal_model(3, vp, 'statistics', "2.11", key = "backend_data_read", atype = int) self._register_dal_model(3, vp, 'statistics', "2.12", key = "cache_hits", atype = int) self._register_dal_model(3, vp, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.15", key = "metadata_store_misses", atype = int) self._register_dal_model(3, vp, 'statistics', "2.16", key = "backend_data_written", atype = int) self._register_dal_model(3, vp, 'statistics', "2.17", key = "data_read_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.18", key = "read_operations", atype = int) self._register_dal_model(3, vp, 'statistics', "2.19", key = "cluster_cache_hits", atype = int) self._register_dal_model(3, vp, 'statistics', "2.20", key = "data_written_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.22", key = "cache_hits_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.23", key = "timestamp", atype = int) self._register_dal_model(3, vp, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.25", key = "backend_data_written_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.26", key = "backend_read_operations", atype = int) self._register_dal_model(3, vp, 'statistics', "2.27", key = "data_written", atype = int) self._register_dal_model(3, vp, 'statistics', "2.28", key = "metadata_store_hits", atype = int) self._register_dal_model(3, vp, 'statistics', "2.29", key = "backend_data_read_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.30", key = "operations_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.32", key = "data_transferred_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.33", key = "write_operations_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.34", key = "data_transferred", atype = int) self._register_dal_model(3, vp, 'status', "3") self._register_dal_model(3, vp, 'description', "4") self._register_dal_model(3, vp, 'vdisks', "5", atype = int) self._register_dal_model(3, vp, '#vmachines', "6", func = lambda vp: len(set([vd.vmachine.guid for vd in vp.vdisks])), atype = int) self.instance_oid += 1 for storagedriver in StorageDriverList.get_storagedrivers(): _guids.add(storagedriver.guid) if not self._check_added(storagedriver): self._register_dal_model(4, storagedriver, 'guid', "0") self._register_dal_model(4, storagedriver, 'name', "1") self._register_dal_model(4, storagedriver, 'stored_data', "2", atype = int) self.instance_oid += 1 try: # try to load OVS Backends from ovs.dal.lists.albabackendlist import AlbaBackendList for backend in AlbaBackendList.get_albabackends(): _guids.add(backend.guid) if not self._check_added(backend): self._register_dal_model(5, backend, 'guid', 0) self._register_dal_model(5, backend, 'name', 1) for disk_id in range(len((backend.all_disks))): self._register_dal_model(5, backend, 'all_disks', '2.{0}.0'.format(disk_id), key = "name", index=disk_id) self._register_dal_model(5, backend, 'all_disks', '2.{0}.1'.format(disk_id), key = "usage.size", atype = long, index=disk_id) self._register_dal_model(5, backend, 'all_disks', '2.{0}.2'.format(disk_id), key = "usage.used", atype = long, index=disk_id) self._register_dal_model(5, backend, 'all_disks', '2.{0}.3'.format(disk_id), key = "usage.available", atype = long, index=disk_id) self._register_dal_model(5, backend, 'all_disks', '2.{0}.4'.format(disk_id), key = "state.state", index=disk_id) self._register_dal_model(5, backend, 'all_disks', '2.{0}.5'.format(disk_id), key = "node_id", index=disk_id) self.instance_oid += 1 except ImportError: print('OVS Backend not present') pass reload = False for object_guid in list(self.model_oids): if object_guid not in _guids: self.model_oids.remove(object_guid) reload = True if reload: self._reload_snmp()
def update_vmachine_config(vmachine, vm_object, pmachine=None): """ Update a vMachine configuration with a given vMachine configuration """ try: vdisks_synced = 0 if vmachine.name is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_created', 'metadata': {'name': vm_object['name']}}) elif vmachine.name != vm_object['name']: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_renamed', 'metadata': {'old_name': vmachine.name, 'new_name': vm_object['name']}}) if pmachine is not None: vmachine.pmachine = pmachine vmachine.name = vm_object['name'] vmachine.hypervisor_id = vm_object['id'] vmachine.devicename = vm_object['backing']['filename'] vmachine.save() # Updating and linking disks storagedrivers = StorageDriverList.get_storagedrivers() datastores = dict([('{}:{}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers]) vdisk_guids = [] for disk in vm_object['disks']: if disk['datastore'] in vm_object['datastores']: datastore = vm_object['datastores'][disk['datastore']] if datastore in datastores: vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool) if vdisk is None: # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario vdisk = VDisk() vdisk.vpool = datastores[datastore].vpool vdisk.reload_client() vdisk.devicename = disk['filename'] vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename'])) vdisk.size = vdisk.info['volume_size'] # Update the disk with information from the hypervisor if vdisk.vmachine is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_attached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': disk['name']}}) vdisk.vmachine = vmachine vdisk.name = disk['name'] vdisk.order = disk['order'] vdisk.save() vdisk_guids.append(vdisk.guid) vdisks_synced += 1 for vdisk in vmachine.vdisks: if vdisk.guid not in vdisk_guids: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_detached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': vdisk.name}}) vdisk.vmachine = None vdisk.save() logger.info('Updating vMachine finished (name {}, {} vdisks (re)linked)'.format( vmachine.name, vdisks_synced )) except Exception as ex: logger.info('Error during vMachine update: {0}'.format(str(ex))) raise
def gather_scrub_work(): """ Retrieve and execute scrub work :return: None """ logger.info('Gather Scrub - Started') scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: logger.info( 'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}' .format(storage_driver.storagerouter.ip, partition.path)) if storage_driver.storagerouter not in scrub_locations: try: _ = SSHClient(storage_driver.storagerouter) scrub_locations[ storage_driver.storagerouter] = str( partition.path) except UnableToConnectException: logger.warning( 'Gather Scrub - Storage Router {0:<15} is not reachable' .format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError('No scrub locations found') vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) logger.info( 'Gather Scrub - Checking {0} volumes for scrub work'.format( len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_storage_router = None local_scrub_location = None local_vdisks_to_scrub = [] result_set = ResultSet([]) storage_router_list = [] for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid logger.info( 'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks' .format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub))) if local is True: local_storage_router = storage_router local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set.add( ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format( storage_router.machine_id))) storage_router_list.append(storage_router) # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish processed_guids = [] if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: try: processed_guids = ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub) except Exception as ex: logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(local_storage_router.ip, ex)) all_results = result_set.join( propagate=False ) # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail for index, result in enumerate(all_results): if isinstance(result, list): processed_guids.extend(result) else: logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(storage_router_list[index].ip, result)) if len(processed_guids) != len(vdisk_guids) or set( processed_guids).difference(vdisk_guids): raise RuntimeError('Scrubbing failed for 1 or more storagerouters') logger.info('Gather Scrub - Finished')
def migrate(previous_version): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at verison 3 it will execute two steps: - 1 > 2 - 2 > 3 @param previous_version: The previous version from which to start the migration. """ working_version = previous_version # Version 1 introduced: # - The datastore is still empty, add defaults if working_version < 1: from ovs.dal.hybrids.user import User from ovs.dal.hybrids.group import Group from ovs.dal.hybrids.role import Role from ovs.dal.hybrids.client import Client from ovs.dal.hybrids.j_rolegroup import RoleGroup from ovs.dal.hybrids.j_roleclient import RoleClient from ovs.dal.hybrids.backendtype import BackendType from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.hybrids.branding import Branding from ovs.dal.lists.backendtypelist import BackendTypeList # Create groups admin_group = Group() admin_group.name = 'administrators' admin_group.description = 'Administrators' admin_group.save() viewers_group = Group() viewers_group.name = 'viewers' viewers_group.description = 'Viewers' viewers_group.save() # Create users admin = User() admin.username = '******' admin.password = hashlib.sha256('admin').hexdigest() admin.is_active = True admin.group = admin_group admin.save() # Create internal OAuth 2 clients admin_pw_client = Client() admin_pw_client.ovs_type = 'INTERNAL' admin_pw_client.grant_type = 'PASSWORD' admin_pw_client.user = admin admin_pw_client.save() admin_cc_client = Client() admin_cc_client.ovs_type = 'INTERNAL' admin_cc_client.grant_type = 'CLIENT_CREDENTIALS' admin_cc_client.client_secret = ''.join(random.choice(string.ascii_letters + string.digits + '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128)) admin_cc_client.user = admin admin_cc_client.save() # Create roles read_role = Role() read_role.code = 'read' read_role.name = 'Read' read_role.description = 'Can read objects' read_role.save() write_role = Role() write_role.code = 'write' write_role.name = 'Write' write_role.description = 'Can write objects' write_role.save() manage_role = Role() manage_role.code = 'manage' manage_role.name = 'Manage' manage_role.description = 'Can manage the system' manage_role.save() # Attach groups to roles mapping = [ (admin_group, [read_role, write_role, manage_role]), (viewers_group, [read_role]) ] for setting in mapping: for role in setting[1]: rolegroup = RoleGroup() rolegroup.group = setting[0] rolegroup.role = role rolegroup.save() for user in setting[0].users: for role in setting[1]: for client in user.clients: roleclient = RoleClient() roleclient.client = client roleclient.role = role roleclient.save() # Add backends for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'), ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in ['MetadataServer', 'AlbaProxy', 'Arakoon']: service_type = ServiceType() service_type.name = service_type_info service_type.save() # Brandings branding = Branding() branding.name = 'Default' branding.description = 'Default bootstrap theme' branding.css = 'bootstrap-default.min.css' branding.productname = 'Open vStorage' branding.is_default = True branding.save() slate = Branding() slate.name = 'Slate' slate.description = 'Dark bootstrap theme' slate.css = 'bootstrap-slate.min.css' slate.productname = 'Open vStorage' slate.is_default = False slate.save() # We're now at version 1 working_version = 1 # Version 2 introduced: # - new Descriptor format if working_version < 2: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry] and 'hybrids' in data[entry]['source']: filename = data[entry]['source'] if not filename.startswith('/'): filename = '/opt/OpenvStorage/ovs/dal/{0}'.format(filename) module = imp.load_source(data[entry]['name'], filename) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) # We're now at version 2 working_version = 2 # Version 3 introduced: # - new Descriptor format if working_version < 3: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry]: module = imp.load_source(data[entry]['name'], data[entry]['source']) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) working_version = 3 # Version 4 introduced: # - Flexible SSD layout if working_version < 4: import os from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.extensions.generic.remote import Remote from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration for service in ServiceTypeList.get_by_name('MetadataServer').services: mds_service = service.mds_service storagedriver = None for current_storagedriver in service.storagerouter.storagedrivers: if current_storagedriver.vpool_guid == mds_service.vpool_guid: storagedriver = current_storagedriver break tasks = {} if storagedriver._data.get('mountpoint_md'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_md'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS) if storagedriver._data.get('mountpoint_temp'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_temp'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS) for disk in service.storagerouter.disks: for partition in disk.partitions: for directory, (role, subrole) in tasks.iteritems(): with Remote(storagedriver.storagerouter.ip, [os], username='******') as remote: stat_dir = directory while not remote.os.path.exists(stat_dir) and stat_dir != '/': stat_dir = stat_dir.rsplit('/', 1)[0] if not stat_dir: stat_dir = '/' inode = remote.os.stat(stat_dir).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.mds_service == mds_service: migrated = True break if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.mds_service = mds_service sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.dir_create(directory) client.dir_chown(directory, 'ovs', 'ovs') client.symlink({sd_partition.path: directory}) for storagedriver in StorageDriverList.get_storagedrivers(): migrated_objects = {} for disk in storagedriver.storagerouter.disks: for partition in disk.partitions: # Process all mountpoints that are unique and don't have a specified size for key, (role, sr_info) in {'mountpoint_md': (DiskPartition.ROLES.DB, {'metadata_{0}': StorageDriverPartition.SUBROLE.MD, 'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG}), 'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, {'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE}), 'mountpoint_foc': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_dtl': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_readcaches': (DiskPartition.ROLES.READ, {'': None}), 'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, {'sco_{0}': StorageDriverPartition.SUBROLE.SCO})}.iteritems(): if key in storagedriver._data: is_list = isinstance(storagedriver._data[key], list) entries = storagedriver._data[key][:] if is_list is True else [storagedriver._data[key]] for entry in entries: if not entry: if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] else: with Remote(storagedriver.storagerouter.ip, [os], username='******') as remote: inode = remote.os.stat(entry).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() for folder, subrole in sr_info.iteritems(): number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() if folder: source = '{0}/{1}'.format(entry, folder.format(storagedriver.vpool.name)) else: source = entry client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.symlink({sd_partition.path: source}) migrated_objects[source] = sd_partition if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] storagedriver.save() if 'mountpoint_bfs' in storagedriver._data: storagedriver.mountpoint_dfs = storagedriver._data['mountpoint_bfs'] if not storagedriver.mountpoint_dfs: storagedriver.mountpoint_dfs = None del storagedriver._data['mountpoint_bfs'] storagedriver.save() if 'mountpoint_temp' in storagedriver._data: del storagedriver._data['mountpoint_temp'] storagedriver.save() if migrated_objects: print 'Loading sizes' config = StorageDriverConfiguration('storagedriver', storagedriver.vpool.name) config.load(SSHClient(storagedriver.storagerouter, username='******')) for readcache in config.configuration.get('content_addressed_cache', {}).get('clustercache_mount_points', []): path = readcache.get('path', '').rsplit('/', 1)[0] size = int(readcache['size'].strip('KiB')) * 1024 if 'size' in readcache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() for writecache in config.configuration.get('scocache', {}).get('scocache_mount_points', []): path = writecache.get('path', '') size = int(writecache['size'].strip('KiB')) * 1024 if 'size' in writecache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() working_version = 4 return working_version
def gather_scrub_work(): """ Retrieve and execute scrub work :return: None """ logger.info('Gather Scrub - Started') scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: logger.info('Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'.format(storage_driver.storagerouter.ip, partition.path)) if storage_driver.storagerouter not in scrub_locations: try: _ = SSHClient(storage_driver.storagerouter) scrub_locations[storage_driver.storagerouter] = str(partition.path) except UnableToConnectException: logger.warning('Gather Scrub - Storage Router {0:<15} is not reachable'.format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError('No scrub locations found') vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) logger.info('Gather Scrub - Checking {0} volumes for scrub work'.format(len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_storage_router = None local_scrub_location = None local_vdisks_to_scrub = [] result_set = ResultSet([]) storage_router_list = [] for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid logger.info('Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'.format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub))) if local is True: local_storage_router = storage_router local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set.add(ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format(storage_router.machine_id) )) storage_router_list.append(storage_router) # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish processed_guids = [] if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: try: processed_guids = ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub) except Exception as ex: logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(local_storage_router.ip, ex)) all_results = result_set.join(propagate=False) # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail for index, result in enumerate(all_results): if isinstance(result, list): processed_guids.extend(result) else: logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(storage_router_list[index].ip, result)) if len(processed_guids) != len(vdisk_guids) or set(processed_guids).difference(vdisk_guids): raise RuntimeError('Scrubbing failed for 1 or more storagerouters') logger.info('Gather Scrub - Finished')
def migrate(previous_version): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at version 3 it will execute two steps: - 1 > 2 - 2 > 3 @param previous_version: The previous version from which to start the migration. """ working_version = previous_version # Version 1 introduced: # - The datastore is still empty, add defaults if working_version < 1: from ovs.dal.hybrids.user import User from ovs.dal.hybrids.group import Group from ovs.dal.hybrids.role import Role from ovs.dal.hybrids.client import Client from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.hybrids.j_rolegroup import RoleGroup from ovs.dal.hybrids.j_roleclient import RoleClient from ovs.dal.hybrids.backendtype import BackendType from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.hybrids.branding import Branding from ovs.dal.lists.backendtypelist import BackendTypeList # Create groups admin_group = Group() admin_group.name = 'administrators' admin_group.description = 'Administrators' admin_group.save() viewers_group = Group() viewers_group.name = 'viewers' viewers_group.description = 'Viewers' viewers_group.save() # Create users admin = User() admin.username = '******' admin.password = hashlib.sha256('admin').hexdigest() admin.is_active = True admin.group = admin_group admin.save() # Create internal OAuth 2 clients admin_pw_client = Client() admin_pw_client.ovs_type = 'INTERNAL' admin_pw_client.grant_type = 'PASSWORD' admin_pw_client.user = admin admin_pw_client.save() admin_cc_client = Client() admin_cc_client.ovs_type = 'INTERNAL' admin_cc_client.grant_type = 'CLIENT_CREDENTIALS' admin_cc_client.client_secret = ''.join(random.choice(string.ascii_letters + string.digits + '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128)) admin_cc_client.user = admin admin_cc_client.save() # Create roles read_role = Role() read_role.code = 'read' read_role.name = 'Read' read_role.description = 'Can read objects' read_role.save() write_role = Role() write_role.code = 'write' write_role.name = 'Write' write_role.description = 'Can write objects' write_role.save() manage_role = Role() manage_role.code = 'manage' manage_role.name = 'Manage' manage_role.description = 'Can manage the system' manage_role.save() # Attach groups to roles mapping = [ (admin_group, [read_role, write_role, manage_role]), (viewers_group, [read_role]) ] for setting in mapping: for role in setting[1]: rolegroup = RoleGroup() rolegroup.group = setting[0] rolegroup.role = role rolegroup.save() for user in setting[0].users: for role in setting[1]: for client in user.clients: roleclient = RoleClient() roleclient.client = client roleclient.role = role roleclient.save() # Add backends for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'), ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in [ServiceType.SERVICE_TYPES.MD_SERVER, ServiceType.SERVICE_TYPES.ALBA_PROXY, ServiceType.SERVICE_TYPES.ARAKOON]: service_type = ServiceType() service_type.name = service_type_info service_type.save() # Branding branding = Branding() branding.name = 'Default' branding.description = 'Default bootstrap theme' branding.css = 'bootstrap-default.min.css' branding.productname = 'Open vStorage' branding.is_default = True branding.save() slate = Branding() slate.name = 'Slate' slate.description = 'Dark bootstrap theme' slate.css = 'bootstrap-slate.min.css' slate.productname = 'Open vStorage' slate.is_default = False slate.save() # Failure Domain failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() # We're now at version 1 working_version = 1 # Version 2 introduced: # - new Descriptor format if working_version < 2: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry] and 'hybrids' in data[entry]['source']: filename = data[entry]['source'] if not filename.startswith('/'): filename = '/opt/OpenvStorage/ovs/dal/{0}'.format(filename) module = imp.load_source(data[entry]['name'], filename) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) # We're now at version 2 working_version = 2 # Version 3 introduced: # - new Descriptor format if working_version < 3: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry]: module = imp.load_source(data[entry]['name'], data[entry]['source']) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) working_version = 3 # Version 4 introduced: # - Flexible SSD layout if working_version < 4: import os from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration for service in ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER).services: mds_service = service.mds_service storagedriver = None for current_storagedriver in service.storagerouter.storagedrivers: if current_storagedriver.vpool_guid == mds_service.vpool_guid: storagedriver = current_storagedriver break tasks = {} if storagedriver._data.get('mountpoint_md'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_md'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS) if storagedriver._data.get('mountpoint_temp'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_temp'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS) for disk in service.storagerouter.disks: for partition in disk.partitions: for directory, (role, subrole) in tasks.iteritems(): with remote(storagedriver.storagerouter.ip, [os], username='******') as rem: stat_dir = directory while not rem.os.path.exists(stat_dir) and stat_dir != '/': stat_dir = stat_dir.rsplit('/', 1)[0] if not stat_dir: stat_dir = '/' inode = rem.os.stat(stat_dir).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.mds_service == mds_service: migrated = True break if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.mds_service = mds_service sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.dir_create(directory) client.dir_chown(directory, 'ovs', 'ovs') client.symlink({sd_partition.path: directory}) for storagedriver in StorageDriverList.get_storagedrivers(): migrated_objects = {} for disk in storagedriver.storagerouter.disks: for partition in disk.partitions: # Process all mountpoints that are unique and don't have a specified size for key, (role, sr_info) in {'mountpoint_md': (DiskPartition.ROLES.DB, {'metadata_{0}': StorageDriverPartition.SUBROLE.MD, 'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG}), 'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, {'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE}), 'mountpoint_foc': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_dtl': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_readcaches': (DiskPartition.ROLES.READ, {'': None}), 'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, {'sco_{0}': StorageDriverPartition.SUBROLE.SCO})}.iteritems(): if key in storagedriver._data: is_list = isinstance(storagedriver._data[key], list) entries = storagedriver._data[key][:] if is_list is True else [storagedriver._data[key]] for entry in entries: if not entry: if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] else: with remote(storagedriver.storagerouter.ip, [os], username='******') as rem: inode = rem.os.stat(entry).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() for folder, subrole in sr_info.iteritems(): number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() if folder: source = '{0}/{1}'.format(entry, folder.format(storagedriver.vpool.name)) else: source = entry client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.symlink({sd_partition.path: source}) migrated_objects[source] = sd_partition if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] storagedriver.save() if 'mountpoint_bfs' in storagedriver._data: storagedriver.mountpoint_dfs = storagedriver._data['mountpoint_bfs'] if not storagedriver.mountpoint_dfs: storagedriver.mountpoint_dfs = None del storagedriver._data['mountpoint_bfs'] storagedriver.save() if 'mountpoint_temp' in storagedriver._data: del storagedriver._data['mountpoint_temp'] storagedriver.save() if migrated_objects: print 'Loading sizes' config = StorageDriverConfiguration('storagedriver', storagedriver.vpool_guid, storagedriver.storagedriver_id) config.load() for readcache in config.configuration.get('content_addressed_cache', {}).get('clustercache_mount_points', []): path = readcache.get('path', '').rsplit('/', 1)[0] size = int(readcache['size'].strip('KiB')) * 1024 if 'size' in readcache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() for writecache in config.configuration.get('scocache', {}).get('scocache_mount_points', []): path = writecache.get('path', '') size = int(writecache['size'].strip('KiB')) * 1024 if 'size' in writecache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() working_version = 4 # Version 5 introduced: # - Failure Domains if working_version < 5: import os from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.lists.failuredomainlist import FailureDomainList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient failure_domains = FailureDomainList.get_failure_domains() if len(failure_domains) > 0: failure_domain = failure_domains[0] else: failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() for storagerouter in StorageRouterList.get_storagerouters(): change = False if storagerouter.primary_failure_domain is None: storagerouter.primary_failure_domain = failure_domain change = True if storagerouter.rdma_capable is None: client = SSHClient(storagerouter, username='******') rdma_capable = False with remote(client.ip, [os], username='******') as rem: for root, dirs, files in rem.os.walk('/sys/class/infiniband'): for directory in dirs: ports_dir = '/'.join([root, directory, 'ports']) if not rem.os.path.exists(ports_dir): continue for sub_root, sub_dirs, _ in rem.os.walk(ports_dir): if sub_root != ports_dir: continue for sub_directory in sub_dirs: state_file = '/'.join([sub_root, sub_directory, 'state']) if rem.os.path.exists(state_file): if 'ACTIVE' in client.run('cat {0}'.format(state_file)): rdma_capable = True storagerouter.rdma_capable = rdma_capable change = True if change is True: storagerouter.save() working_version = 5 # Version 6 introduced: # - Distributed scrubbing if working_version < 6: from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.sshclient import SSHClient for storage_driver in StorageDriverList.get_storagedrivers(): root_client = SSHClient(storage_driver.storagerouter, username='******') for partition in storage_driver.partitions: if partition.role == DiskPartition.ROLES.SCRUB: old_path = partition.path partition.sub_role = None partition.save() partition.invalidate_dynamics(['folder', 'path']) if root_client.dir_exists(partition.path): continue # New directory already exists if '_mds_' in old_path: if root_client.dir_exists(old_path): root_client.symlink({partition.path: old_path}) if not root_client.dir_exists(partition.path): root_client.dir_create(partition.path) root_client.dir_chmod(partition.path, 0777) working_version = 6 # Version 7 introduced: # - vPool status if working_version < 7: from ovs.dal.hybrids import vpool reload(vpool) from ovs.dal.hybrids.vpool import VPool from ovs.dal.lists.vpoollist import VPoolList for _vpool in VPoolList.get_vpools(): vpool = VPool(_vpool.guid) if hasattr(vpool, 'status') and vpool.status is None: vpool.status = VPool.STATUSES.RUNNING vpool.save() working_version = 7 # Version 10 introduced: # - Reverse indexes are stored in persistent store # - Store more non-changing metadata on disk iso using a dynamic property if working_version < 10: from ovs.dal.helpers import HybridRunner, Descriptor from ovs.dal.datalist import DataList from ovs.extensions.storage.persistentfactory import PersistentFactory from ovs.extensions.storage.volatilefactory import VolatileFactory persistent = PersistentFactory.get_client() for prefix in ['ovs_listcache', 'ovs_reverseindex']: for key in persistent.prefix(prefix): persistent.delete(key) for key in persistent.prefix('ovs_data_'): persistent.set(key, persistent.get(key)) base_reverse_key = 'ovs_reverseindex_{0}_{1}|{2}|{3}' hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() all_objects = DataList(cls, {'type': DataList.where_operator.AND, 'items': []}) for item in all_objects: guid = item.guid for relation in item._relations: if relation.foreign_type is None: rcls = cls rclsname = rcls.__name__.lower() else: rcls = relation.foreign_type rclsname = rcls.__name__.lower() key = relation.name rguid = item._data[key]['guid'] if rguid is not None: reverse_key = base_reverse_key.format(rclsname, rguid, relation.foreign_key, guid) persistent.set(reverse_key, 0) volatile = VolatileFactory.get_client() try: volatile._client.flush_all() except: pass from ovs.dal.lists.vdisklist import VDiskList for vdisk in VDiskList.get_vdisks(): try: vdisk.metadata = {'lba_size': vdisk.info['lba_size'], 'cluster_multiplier': vdisk.info['cluster_multiplier']} vdisk.save() except: pass working_version = 10 # Version 11 introduced: # - ALBA accelerated ALBA, meaning different vpool.metadata information if working_version < 11: from ovs.dal.lists.vpoollist import VPoolList for vpool in VPoolList.get_vpools(): vpool.metadata = {'backend': vpool.metadata} if 'metadata' in vpool.metadata['backend']: vpool.metadata['backend']['arakoon_config'] = vpool.metadata['backend'].pop('metadata') if 'backend_info' in vpool.metadata['backend']: vpool.metadata['backend']['backend_info']['fragment_cache_on_read'] = True vpool.metadata['backend']['backend_info']['fragment_cache_on_write'] = False vpool.save() working_version = 11 return working_version
def update_vmachine_config(vmachine, vm_object, pmachine=None): """ Update a vMachine configuration with a given vMachine configuration :param vmachine: Virtual Machine to update :param vm_object: New virtual machine info :param pmachine: Physical machine of the virtual machine """ try: vdisks_synced = 0 if vmachine.name is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_created', 'metadata': {'name': vm_object['name']}}) elif vmachine.name != vm_object['name']: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_renamed', 'metadata': {'old_name': vmachine.name, 'new_name': vm_object['name']}}) if pmachine is not None: vmachine.pmachine = pmachine vmachine.name = vm_object['name'] vmachine.hypervisor_id = vm_object['id'] vmachine.devicename = vm_object['backing']['filename'] vmachine.save() # Updating and linking disks storagedrivers = StorageDriverList.get_storagedrivers() datastores = dict([('{0}:{1}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers]) vdisk_guids = [] mutex = volatile_mutex('{0}_{1}'.format(vmachine.name, vmachine.devicename)) for disk in vm_object['disks']: ensure_safety = False if disk['datastore'] in vm_object['datastores']: datastore = vm_object['datastores'][disk['datastore']] if datastore in datastores: try: mutex.acquire(wait=10) vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool) if vdisk is None: # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario vdisk = VDisk() vdisk.vpool = datastores[datastore].vpool vdisk.reload_client() vdisk.devicename = disk['filename'] vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename'])) vdisk.size = vdisk.info['volume_size'] vdisk.metadata = {'lba_size': vdisk.info['lba_size'], 'cluster_multiplier': vdisk.info['cluster_multiplier']} # Create the disk in a locked context, but don't execute long running-task in same context vdisk.save() ensure_safety = True finally: mutex.release() if ensure_safety: MDSServiceController.ensure_safety(vdisk) VDiskController.dtl_checkup(vdisk_guid=vdisk.guid) # Update the disk with information from the hypervisor if vdisk.vmachine is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_attached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': disk['name']}}) vdisk.vmachine = vmachine vdisk.name = disk['name'] vdisk.order = disk['order'] vdisk.save() vdisk_guids.append(vdisk.guid) vdisks_synced += 1 for vdisk in vmachine.vdisks: if vdisk.guid not in vdisk_guids: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_detached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': vdisk.name}}) vdisk.vmachine = None vdisk.save() VMachineController._logger.info('Updating vMachine finished (name {0}, {1} vdisks (re)linked)'.format( vmachine.name, vdisks_synced )) except Exception as ex: VMachineController._logger.info('Error during vMachine update: {0}'.format(str(ex))) raise
def verify_vdisk_cache_quota(): """ Validates whether the caching quota is reaching its limits or has surpassed it Each vDisk can consume a part of the total fragment caching capacity """ MonitoringController._logger.info( 'Starting vDisk caching quota verification...') alba_guid_size_map = {} for storagedriver in StorageDriverList.get_storagedrivers(): storagedriver.invalidate_dynamics( ['vpool_backend_info', 'vdisks_guids']) for cache_type in [[ 'cache_quota_fc', 'backend_info', 'connection_info', 'fragment' ], [ 'cache_quota_bc', 'block_cache_backend_info', 'block_cache_connection_info', 'block' ]]: cache_quota = storagedriver.vpool_backend_info[cache_type[0]] backend_info = storagedriver.vpool_backend_info[cache_type[1]] connection_info = storagedriver.vpool_backend_info[ cache_type[2]] if backend_info is None or connection_info is None: continue alba_backend_name = backend_info['name'] alba_backend_host = connection_info['host'] alba_backend_guid = backend_info['alba_backend_guid'] if alba_backend_guid not in alba_guid_size_map: ovs_client = OVSClient( ip=alba_backend_host, port=connection_info['port'], credentials=(connection_info['client_id'], connection_info['client_secret']), version=2, cache_store=VolatileFactory.get_client()) try: alba_guid_size_map[alba_backend_guid] = { 'name': alba_backend_name, 'backend_ip': alba_backend_host, 'total_size': ovs_client.get('/alba/backends/{0}/'.format( alba_backend_guid), params={'contents': 'usages' })['usages']['size'], 'requested_size': 0 } except Exception: MonitoringController._logger.exception( 'Failed to retrieve ALBA Backend info for {0} on host {1}' .format(alba_backend_name, alba_backend_host)) continue for vdisk_guid in storagedriver.vdisks_guids: vdisk = VDisk(vdisk_guid) vdisk_cq = vdisk.cache_quota.get( cache_type[3] ) if vdisk.cache_quota is not None else None if vdisk_cq is None: alba_guid_size_map[alba_backend_guid][ 'requested_size'] += cache_quota if cache_quota is not None else 0 else: alba_guid_size_map[alba_backend_guid][ 'requested_size'] += vdisk_cq local_ips = [sr.ip for sr in StorageRouterList.get_storagerouters()] for alba_backend_info in alba_guid_size_map.itervalues(): name = alba_backend_info['name'] backend_ip = alba_backend_info['backend_ip'] location = 'local' remote_msg = '' if backend_ip not in local_ips: location = 'remote' remote_msg = ' (on remote IP {0})'.format(backend_ip) percentage = alba_backend_info[ 'requested_size'] / alba_backend_info['total_size'] * 100 if percentage > 100: MonitoringController._logger.error( 'OVS_WARNING: Over-allocation for vDisk caching quota on {0} ALBA Backend {1}{2}. Unexpected behavior might occur' .format(location, name, remote_msg)) elif percentage > 70: MonitoringController._logger.warning( 'OVS_WARNING: vDisk caching quota on {0} ALBA Backend {1} is at {2:.1f}%{3}' .format(location, name, percentage, remote_msg)) MonitoringController._logger.info( 'Finished vDisk cache quota verification')
def gather_scrub_work(): """ Retrieve and execute scrub work :return: None """ ScheduledTaskController._logger.info('Gather Scrub - Started') scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: ScheduledTaskController._logger.info('Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'.format(storage_driver.storagerouter.ip, partition.path)) if storage_driver.storagerouter not in scrub_locations: try: sshclient = SSHClient(storage_driver.storagerouter) # Use ServiceManager(sshclient) to make sure ovs-workers are actually running if ServiceManager.get_service_status('workers', sshclient) is False: ScheduledTaskController._logger.warning('Gather Scrub - Storage Router {0:<15} - workers are not running'.format(storage_driver.storagerouter.ip)) else: scrub_locations[storage_driver.storagerouter] = str(partition.path) except UnableToConnectException: ScheduledTaskController._logger.warning('Gather Scrub - Storage Router {0:<15} is not reachable'.format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError('No scrub locations found') vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) if len(vdisk_guids) == 0: ScheduledTaskController._logger.info('Gather Scrub - No scrub work needed'.format(len(vdisk_guids))) return ScheduledTaskController._logger.info('Gather Scrub - Checking {0} volumes for scrub work'.format(len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_storage_router = None local_scrub_location = None local_vdisks_to_scrub = [] result_set = {} storage_router_list = [] scrub_map = {} for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid ScheduledTaskController._logger.info('Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'.format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub))) if local is True: local_storage_router = storage_router local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set[storage_router.ip] = ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub).apply_async(routing_key='sr.{0}'.format(storage_router.machine_id)) storage_router_list.append(storage_router) scrub_map[storage_router.ip] = vdisk_guids_to_scrub # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish processed_guids = [] if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: try: processed_guids = ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub) except Exception as ex: ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(local_storage_router.ip, ex)) all_results, failed_nodes = CeleryToolbox.manage_running_tasks(result_set, timesleep=60) # Check every 60 seconds if tasks are still running for ip, result in all_results.iteritems(): if isinstance(result, list): processed_guids.extend(result) else: ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(ip, result)) result_set = {} for failed_node in failed_nodes: ScheduledTaskController._logger.warning('Scrubbing failed on node {0}. Will reschedule on another node.'.format(failed_node)) vdisk_guids_to_scrub = scrub_map[failed_node] rescheduled_work = False for storage_router, scrub_location in scrub_locations.items(): if storage_router.ip not in failed_nodes: if storage_router.machine_id != local_machineid: ScheduledTaskController._logger.info('Rescheduled scrub work from node {0} to node {1}.'.format(failed_node, storage_router.ip)) result_set[storage_router.ip] = ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_location, vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format(storage_router.machine_id)) storage_router_list.append(storage_router) rescheduled_work = True break if rescheduled_work is False: if local_scrub_location is not None: try: processed_guids.extend(ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location, vdisk_guids=vdisk_guids_to_scrub)) except Exception as ex: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}'.format(ex)) else: ScheduledTaskController._logger.warning('No nodes left to reschedule work from node {0}'.format(failed_node)) if len(result_set) > 0: all_results2, failed_nodes = CeleryToolbox.manage_running_tasks(result_set, timesleep=60) # Check every 60 seconds if tasks are still running for ip, result in all_results2.iteritems(): if isinstance(result, list): processed_guids.extend(result) else: ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(ip, result)) if len(set(processed_guids)) != len(vdisk_guids) or set(processed_guids).difference(vdisk_guids): raise RuntimeError('Scrubbing failed for 1 or more storagerouters') ScheduledTaskController._logger.info('Gather Scrub - Finished')
def list(self): """ Overview of all StorageDrivers """ return StorageDriverList.get_storagedrivers()
def add_vpool(parameters): """ Add a vPool to the machine this task is running on """ parameters = {} if parameters is None else parameters ip = parameters['storagerouter_ip'] vpool_name = parameters['vpool_name'] if StorageRouterController._validate_ip(ip) is False: raise ValueError('The entered ip address is invalid') if not re.match('^[0-9a-z]+(\-+[0-9a-z]+)*$', vpool_name): raise ValueError('Invalid vpool_name given') client = SSHClient.load(ip) # Make sure to ALWAYS reload the client, as Fabric seems to be singleton-ish unique_id = System.get_my_machine_id(client) storagerouter = None for current_storagerouter in StorageRouterList.get_storagerouters(): if current_storagerouter.ip == ip and current_storagerouter.machine_id == unique_id: storagerouter = current_storagerouter break if storagerouter is None: raise RuntimeError('Could not find Storage Router with given ip address') vpool = VPoolList.get_vpool_by_name(vpool_name) storagedriver = None if vpool is not None: if vpool.backend_type.code == 'local': # Might be an issue, investigating whether it's on the same not or not if len(vpool.storagedrivers) == 1 and vpool.storagedrivers[0].storagerouter.machine_id != unique_id: raise RuntimeError('A local vPool with name {0} already exists'.format(vpool_name)) for vpool_storagedriver in vpool.storagedrivers: if vpool_storagedriver.storagerouter_guid == storagerouter.guid: storagedriver = vpool_storagedriver # The vPool is already added to this Storage Router and this might be a cleanup/recovery # Check whether there are running machines on this vPool machine_guids = [] for vdisk in vpool.vdisks: if vdisk.vmachine_guid not in machine_guids: machine_guids.append(vdisk.vmachine_guid) if vdisk.vmachine.hypervisor_status in ['RUNNING', 'PAUSED']: raise RuntimeError( 'At least one vMachine using this vPool is still running or paused. Make sure there are no active vMachines' ) nodes = {ip} # Set comprehension if vpool is not None: for vpool_storagedriver in vpool.storagedrivers: nodes.add(vpool_storagedriver.storagerouter.ip) nodes = list(nodes) services = ['volumedriver_{0}'.format(vpool_name), 'failovercache_{0}'.format(vpool_name)] # Stop services for node in nodes: node_client = SSHClient.load(node) for service in services: System.exec_remote_python(node_client, """ from ovs.plugin.provider.service import Service if Service.has_service('{0}'): Service.disable_service('{0}') """.format(service)) System.exec_remote_python(node_client, """ from ovs.plugin.provider.service import Service if Service.has_service('{0}'): Service.stop_service('{0}') """.format(service)) # Keep in mind that if the Storage Driver exists, the vPool does as well client = SSHClient.load(ip) mountpoint_bfs = '' directories_to_create = [] if vpool is None: vpool = VPool() supported_backends = System.read_remote_config(client, 'volumedriver.supported.backends').split(',') if 'rest' in supported_backends: supported_backends.remove('rest') # REST is not supported for now backend_type = BackendTypeList.get_backend_type_by_code(parameters['type']) vpool.backend_type = backend_type connection_host = connection_port = connection_username = connection_password = None if vpool.backend_type.code in ['local', 'distributed']: vpool.metadata = {'backend_type': 'LOCAL'} mountpoint_bfs = parameters['mountpoint_bfs'] directories_to_create.append(mountpoint_bfs) vpool.metadata['local_connection_path'] = mountpoint_bfs if vpool.backend_type.code == 'rest': connection_host = parameters['connection_host'] connection_port = parameters['connection_port'] rest_connection_timeout_secs = parameters['connection_timeout'] vpool.metadata = {'rest_connection_host': connection_host, 'rest_connection_port': connection_port, 'buchla_connection_log_level': "0", 'rest_connection_verbose_logging': rest_connection_timeout_secs, 'rest_connection_metadata_format': "JSON", 'backend_type': 'REST'} elif vpool.backend_type.code in ('ceph_s3', 'amazon_s3', 'swift_s3'): connection_host = parameters['connection_host'] connection_port = parameters['connection_port'] connection_username = parameters['connection_username'] connection_password = parameters['connection_password'] if vpool.backend_type.code in ['swift_s3']: strict_consistency = 'false' s3_connection_flavour = 'SWIFT' else: strict_consistency = 'true' s3_connection_flavour = 'S3' vpool.metadata = {'s3_connection_host': connection_host, 's3_connection_port': connection_port, 's3_connection_username': connection_username, 's3_connection_password': connection_password, 's3_connection_flavour': s3_connection_flavour, 's3_connection_strict_consistency': strict_consistency, 's3_connection_verbose_logging': 1, 'backend_type': 'S3'} vpool.name = vpool_name vpool.description = "{} {}".format(vpool.backend_type.code, vpool_name) vpool.login = connection_username vpool.password = connection_password if not connection_host: vpool.connection = None else: vpool.connection = '{}:{}'.format(connection_host, connection_port) vpool.save() # Connection information is Storage Driver related information new_storagedriver = False if storagedriver is None: storagedriver = StorageDriver() new_storagedriver = True mountpoint_temp = parameters['mountpoint_temp'] mountpoint_md = parameters['mountpoint_md'] mountpoint_readcache1 = parameters['mountpoint_readcache1'] mountpoint_readcache2 = parameters.get('mountpoint_readcache2', '') mountpoint_writecache = parameters['mountpoint_writecache'] mountpoint_foc = parameters['mountpoint_foc'] directories_to_create.append(mountpoint_temp) directories_to_create.append(mountpoint_md) directories_to_create.append(mountpoint_readcache1) if mountpoint_readcache2: directories_to_create.append(mountpoint_readcache2) directories_to_create.append(mountpoint_writecache) directories_to_create.append(mountpoint_foc) client = SSHClient.load(ip) dir_create_script = """ import os for directory in {0}: if not os.path.exists(directory): os.makedirs(directory) """.format(directories_to_create) System.exec_remote_python(client, dir_create_script) read_cache1_fs = os.statvfs(mountpoint_readcache1) read_cache2_fs = None if mountpoint_readcache2: read_cache2_fs = os.statvfs(mountpoint_readcache2) write_cache_fs = os.statvfs(mountpoint_writecache) fdcache = '{}/fd_{}'.format(mountpoint_writecache, vpool_name) scocache = '{}/sco_{}'.format(mountpoint_writecache, vpool_name) readcache1 = '{}/read1_{}'.format(mountpoint_readcache1, vpool_name) files2create = [readcache1] if mountpoint_readcache2 and mountpoint_readcache1 != mountpoint_readcache2: readcache2 = '{}/read2_{}'.format(mountpoint_readcache2, vpool_name) files2create.append(readcache2) else: readcache2 = '' failovercache = '{}/foc_{}'.format(mountpoint_foc, vpool_name) metadatapath = '{}/metadata_{}'.format(mountpoint_md, vpool_name) tlogpath = '{}/tlogs_{}'.format(mountpoint_md, vpool_name) rsppath = '/var/rsp/{}'.format(vpool_name) dirs2create = [scocache, failovercache, metadatapath, tlogpath, rsppath, System.read_remote_config(client, 'volumedriver.readcache.serialization.path')] cmd = "cat /etc/mtab | grep ^/dev/ | cut -d ' ' -f 2" mountpoints = [device.strip() for device in client.run(cmd).strip().split('\n')] mountpoints.remove('/') def is_partition(directory): for mountpoint in mountpoints: if directory == mountpoint: return True return False # Cache sizes # 20% = scocache # 20% = failovercache (@TODO: check if this can possibly consume more than 20%) # 60% = readcache # safety values: readcache1_factor = 0.2 readcache2_factor = 0.2 writecache_factor = 0.1 if (mountpoint_readcache1 == mountpoint_readcache2) or not mountpoint_readcache2: delta = set() delta.add(mountpoint_readcache1 if is_partition(mountpoint_readcache1) else '/dummy') delta.add(mountpoint_writecache if is_partition(mountpoint_writecache) else '/dummy') delta.add(mountpoint_foc if is_partition(mountpoint_foc) else '/dummy') if len(delta) == 1: readcache1_factor = 0.49 writecache_factor = 0.2 elif len(delta) == 2: if mountpoint_writecache == mountpoint_foc: readcache1_factor = 0.98 writecache_factor = 0.49 else: readcache1_factor = 0.49 if mountpoint_readcache1 == mountpoint_writecache: writecache_factor = 0.49 else: writecache_factor = 0.98 elif len(delta) == 3: readcache1_factor = 0.98 writecache_factor = 0.98 else: delta = set() delta.add(mountpoint_readcache1 if is_partition(mountpoint_readcache1) else '/dummy') delta.add(mountpoint_readcache2 if is_partition(mountpoint_readcache2) else '/dummy') delta.add(mountpoint_writecache if is_partition(mountpoint_writecache) else '/dummy') delta.add(mountpoint_foc if is_partition(mountpoint_foc) else '/dummy') if len(delta) == 1: # consider them all to be directories readcache1_factor = 0.24 readcache2_factor = 0.24 writecache_factor = 0.24 elif len(delta) == 2: if mountpoint_writecache == mountpoint_foc: writecache_factor = 0.24 if mountpoint_readcache1 == mountpoint_writecache: readcache1_factor = 0.49 readcache2_factor = 0.98 else: readcache1_factor = 0.98 readcache2_factor = 0.49 else: readcache1_factor = readcache2_factor = 0.49 writecache_factor = 0.49 elif len(delta) == 3: if mountpoint_writecache == mountpoint_foc: readcache1_factor = 0.98 readcache2_factor = 0.98 writecache_factor = 0.49 elif mountpoint_readcache1 == mountpoint_writecache: readcache1_factor = 0.49 readcache2_factor = 0.98 writecache_factor = 0.49 elif mountpoint_readcache1 == mountpoint_foc: readcache1_factor = 0.49 readcache2_factor = 0.98 writecache_factor = 0.98 elif mountpoint_readcache2 == mountpoint_writecache: readcache1_factor = 0.98 readcache2_factor = 0.49 writecache_factor = 0.49 elif mountpoint_readcache2 == mountpoint_foc: readcache1_factor = 0.98 readcache2_factor = 0.49 writecache_factor = 0.98 elif len(delta) == 4: readcache1_factor = 0.98 readcache2_factor = 0.98 writecache_factor = 0.98 # summarize caching on root partition (directory only) root_assigned = dict() if not is_partition(mountpoint_readcache1): root_assigned['readcache1_factor'] = readcache1_factor if not is_partition(mountpoint_readcache2): root_assigned['readcache2_factor'] = readcache2_factor if not is_partition(mountpoint_writecache): root_assigned['writecache_factor'] = writecache_factor if not is_partition(mountpoint_foc): root_assigned['foc_factor'] = min(readcache1_factor, readcache2_factor, writecache_factor) # always leave at least 20% of free space division_factor = 1.0 total_size = sum(root_assigned.values()) + .02 * len(root_assigned) if 0.8 < total_size < 1.6: division_factor = 2.0 elif 1.6 < total_size < 3.2: division_factor = 4.0 elif total_size >= 3.2: division_factor = 8.0 if 'readcache1_factor' in root_assigned.keys(): readcache1_factor /= division_factor if 'readcache2_factor' in root_assigned.keys(): readcache2_factor /= division_factor if 'writecache_factor' in root_assigned.keys(): writecache_factor /= division_factor scocache_size = '{0}KiB'.format((int(write_cache_fs.f_bavail * writecache_factor / 4096) * 4096) * 4) if (mountpoint_readcache1 and not mountpoint_readcache2) or (mountpoint_readcache1 == mountpoint_readcache2): mountpoint_readcache2 = '' readcache1_size = '{0}KiB'.format((int(read_cache1_fs.f_bavail * readcache1_factor / 4096) * 4096) * 4) readcache2 = '' readcache2_size = '0KiB' else: readcache1_size = '{0}KiB'.format((int(read_cache1_fs.f_bavail * readcache1_factor / 4096) * 4096) * 4) readcache2_size = '{0}KiB'.format((int(read_cache2_fs.f_bavail * readcache2_factor / 4096) * 4096) * 4) if new_storagedriver: ports_in_use = System.ports_in_use(client) ports_reserved = [] ports_in_use_model = {} for port_storagedriver in StorageDriverList.get_storagedrivers(): if port_storagedriver.vpool_guid not in ports_in_use_model: ports_in_use_model[port_storagedriver.vpool_guid] = port_storagedriver.ports ports_reserved += port_storagedriver.ports if vpool.guid in ports_in_use_model: # The vPool is extended to another StorageRouter. We need to use these ports. ports = ports_in_use_model[vpool.guid] if any(port in ports_in_use for port in ports): raise RuntimeError('The required ports are in use') else: # First StorageDriver for this vPool, so generating new ports ports = [] for port_range in System.read_remote_config(client, 'volumedriver.filesystem.ports').split(','): port_range = port_range.strip() if '-' in port_range: current_range = (int(port_range.split('-')[0]), int(port_range.split('-')[1])) else: current_range = (int(port_range), 65536) current_port = current_range[0] while len(ports) < 3: if current_port not in ports_in_use and current_port not in ports_reserved: ports.append(current_port) current_port += 1 if current_port > current_range[1]: break if len(ports) != 3: raise RuntimeError('Could not find enough free ports') else: ports = storagedriver.ports ip_path = Configuration.get('ovs.core.ip.path') if ip_path is None: ip_path = "`which ip`" cmd = "{0} a | grep 'inet ' | sed 's/\s\s*/ /g' | cut -d ' ' -f 3 | cut -d '/' -f 1".format(ip_path) ipaddresses = client.run(cmd).strip().split('\n') ipaddresses = [ipaddr.strip() for ipaddr in ipaddresses] grid_ip = System.read_remote_config(client, 'ovs.grid.ip') if grid_ip in ipaddresses: ipaddresses.remove(grid_ip) if not ipaddresses: raise RuntimeError('No available ip addresses found suitable for Storage Router storage ip') if storagerouter.pmachine.hvtype == 'KVM': volumedriver_storageip = '127.0.0.1' else: volumedriver_storageip = parameters['storage_ip'] vrouter_id = '{0}{1}'.format(vpool_name, unique_id) vrouter_config = {'vrouter_id': vrouter_id, 'vrouter_redirect_timeout_ms': '5000', 'vrouter_routing_retries': 10, 'vrouter_volume_read_threshold': 1024, 'vrouter_volume_write_threshold': 1024, 'vrouter_file_read_threshold': 1024, 'vrouter_file_write_threshold': 1024, 'vrouter_min_workers': 4, 'vrouter_max_workers': 16} voldrv_arakoon_cluster_id = str(System.read_remote_config(client, 'volumedriver.arakoon.clusterid')) voldrv_arakoon_cluster = ArakoonManagementEx().getCluster(voldrv_arakoon_cluster_id) voldrv_arakoon_client_config = voldrv_arakoon_cluster.getClientConfig() arakoon_node_configs = [] for arakoon_node in voldrv_arakoon_client_config.keys(): arakoon_node_configs.append(ArakoonNodeConfig(arakoon_node, voldrv_arakoon_client_config[arakoon_node][0][0], voldrv_arakoon_client_config[arakoon_node][1])) vrouter_clusterregistry = ClusterRegistry(str(vpool.guid), voldrv_arakoon_cluster_id, arakoon_node_configs) node_configs = [] for existing_storagedriver in StorageDriverList.get_storagedrivers(): if existing_storagedriver.vpool_guid == vpool.guid: node_configs.append(ClusterNodeConfig(str(existing_storagedriver.storagedriver_id), str(existing_storagedriver.cluster_ip), existing_storagedriver.ports[0], existing_storagedriver.ports[1], existing_storagedriver.ports[2])) if new_storagedriver: node_configs.append(ClusterNodeConfig(vrouter_id, grid_ip, ports[0], ports[1], ports[2])) vrouter_clusterregistry.set_node_configs(node_configs) readcaches = [{'path': readcache1, 'size': readcache1_size}] if readcache2: readcaches.append({'path': readcache2, 'size': readcache2_size}) scocaches = [{'path': scocache, 'size': scocache_size}] filesystem_config = {'fs_backend_path': mountpoint_bfs} volumemanager_config = {'metadata_path': metadatapath, 'tlog_path': tlogpath} storagedriver_config_script = """ from ovs.plugin.provider.configuration import Configuration from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration fd_config = {{'fd_cache_path': '{11}', 'fd_extent_cache_capacity': '1024', 'fd_namespace' : 'fd-{0}-{12}'}} storagedriver_configuration = StorageDriverConfiguration('{0}') storagedriver_configuration.configure_backend({1}) storagedriver_configuration.configure_readcache({2}, Configuration.get('volumedriver.readcache.serialization.path') + '/{0}') storagedriver_configuration.configure_scocache({3}, '1GB', '2GB') storagedriver_configuration.configure_failovercache('{4}') storagedriver_configuration.configure_filesystem({5}) storagedriver_configuration.configure_volumemanager({6}) storagedriver_configuration.configure_volumerouter('{12}', {7}) storagedriver_configuration.configure_arakoon_cluster('{8}', {9}) storagedriver_configuration.configure_hypervisor('{10}') storagedriver_configuration.configure_filedriver(fd_config) """.format(vpool_name, vpool.metadata, readcaches, scocaches, failovercache, filesystem_config, volumemanager_config, vrouter_config, voldrv_arakoon_cluster_id, voldrv_arakoon_client_config, storagerouter.pmachine.hvtype, fdcache, vpool.guid) System.exec_remote_python(client, storagedriver_config_script) remote_script = """ import os from configobj import ConfigObj from ovs.plugin.provider.configuration import Configuration protocol = Configuration.get('ovs.core.broker.protocol') login = Configuration.get('ovs.core.broker.login') password = Configuration.get('ovs.core.broker.password') vpool_name = {0} uris = [] cfg = ConfigObj('/opt/OpenvStorage/config/rabbitmqclient.cfg') main_section = cfg.get('main') nodes = main_section['nodes'] if type(main_section['nodes']) == list else [main_section['nodes']] for node in nodes: uris.append({{'amqp_uri': '{{0}}://{{1}}:{{2}}@{{3}}'.format(protocol, login, password, cfg.get(node)['location'])}}) from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration queue_config = {{'events_amqp_routing_key': Configuration.get('ovs.core.broker.volumerouter.queue'), 'events_amqp_uris': uris}} for config_file in os.listdir('/opt/OpenvStorage/config/voldrv_vpools'): this_vpool_name = config_file.replace('.json', '') if config_file.endswith('.json') and (vpool_name is None or vpool_name == this_vpool_name): storagedriver_configuration = StorageDriverConfiguration(this_vpool_name) storagedriver_configuration.configure_event_publisher(queue_config) """.format(vpool_name if vpool_name is None else "'{0}'".format(vpool_name)) System.exec_remote_python(client, remote_script) # Updating the model storagedriver.storagedriver_id = vrouter_id storagedriver.name = vrouter_id.replace('_', ' ') storagedriver.description = storagedriver.name storagedriver.storage_ip = volumedriver_storageip storagedriver.cluster_ip = grid_ip storagedriver.ports = ports storagedriver.mountpoint = '/mnt/{0}'.format(vpool_name) storagedriver.mountpoint_temp = mountpoint_temp storagedriver.mountpoint_readcache1 = mountpoint_readcache1 storagedriver.mountpoint_readcache2 = mountpoint_readcache2 storagedriver.mountpoint_writecache = mountpoint_writecache storagedriver.mountpoint_foc = mountpoint_foc storagedriver.mountpoint_bfs = mountpoint_bfs storagedriver.mountpoint_md = mountpoint_md storagedriver.storagerouter = storagerouter storagedriver.vpool = vpool storagedriver.save() dirs2create.append(storagedriver.mountpoint) dirs2create.append(mountpoint_writecache + '/' + '/fd_' + vpool_name) dirs2create.append('{0}/fd_{1}'.format(mountpoint_writecache, vpool_name)) file_create_script = """ import os for directory in {0}: if not os.path.exists(directory): os.makedirs(directory) for filename in {1}: if not os.path.exists(filename): open(filename, 'a').close() """.format(dirs2create, files2create) System.exec_remote_python(client, file_create_script) voldrv_config_file = '{0}/voldrv_vpools/{1}.json'.format(System.read_remote_config(client, 'ovs.core.cfgdir'), vpool_name) log_file = '/var/log/ovs/volumedriver/{0}.log'.format(vpool_name) vd_cmd = '/usr/bin/volumedriver_fs -f --config-file={0} --mountpoint {1} --logrotation --logfile {2} -o big_writes -o sync_read -o allow_other'.format( voldrv_config_file, storagedriver.mountpoint, log_file) if storagerouter.pmachine.hvtype == 'KVM': vd_stopcmd = 'umount {0}'.format(storagedriver.mountpoint) else: vd_stopcmd = 'exportfs -u *:{0}; umount {0}'.format(storagedriver.mountpoint) vd_name = 'volumedriver_{}'.format(vpool_name) log_file = '/var/log/ovs/volumedriver/foc_{0}.log'.format(vpool_name) fc_cmd = '/usr/bin/failovercachehelper --config-file={0} --logfile={1}'.format(voldrv_config_file, log_file) fc_name = 'failovercache_{0}'.format(vpool_name) params = {'<VPOOL_MOUNTPOINT>': storagedriver.mountpoint, '<HYPERVISOR_TYPE>': storagerouter.pmachine.hvtype, '<VPOOL_NAME>': vpool_name, '<UUID>': str(uuid.uuid4())} if Osdist.is_ubuntu(client): if client.file_exists('/opt/OpenvStorage/config/templates/upstart/ovs-volumedriver.conf'): client.run('cp -f /opt/OpenvStorage/config/templates/upstart/ovs-volumedriver.conf /opt/OpenvStorage/config/templates/upstart/ovs-volumedriver_{0}.conf'.format(vpool_name)) client.run('cp -f /opt/OpenvStorage/config/templates/upstart/ovs-failovercache.conf /opt/OpenvStorage/config/templates/upstart/ovs-failovercache_{0}.conf'.format(vpool_name)) else: if client.file_exists('/opt/OpenvStorage/config/templates/systemd/ovs-volumedriver.service'): client.run('cp -f /opt/OpenvStorage/config/templates/systemd/ovs-volumedriver.service /opt/OpenvStorage/config/templates/systemd/ovs-volumedriver_{0}.service'.format(vpool_name)) client.run('cp -f /opt/OpenvStorage/config/templates/systemd/ovs-failovercache.service /opt/OpenvStorage/config/templates/systemd/ovs-failovercache_{0}.service'.format(vpool_name)) service_script = """ from ovs.plugin.provider.service import Service Service.add_service(package=('openvstorage', 'volumedriver'), name='{0}', command='{1}', stop_command='{2}', params={5}) Service.add_service(package=('openvstorage', 'failovercache'), name='{3}', command='{4}', stop_command=None, params={5}) """.format( vd_name, vd_cmd, vd_stopcmd, fc_name, fc_cmd, params ) System.exec_remote_python(client, service_script) if storagerouter.pmachine.hvtype == 'VMWARE': client.run("grep -q '/tmp localhost(ro,no_subtree_check)' /etc/exports || echo '/tmp localhost(ro,no_subtree_check)' >> /etc/exports") if Osdist.is_ubuntu(client): client.run('service nfs-kernel-server start') else: client.run('service nfs start') if storagerouter.pmachine.hvtype == 'KVM': client.run('virsh pool-define-as {0} dir - - - - {1}'.format(vpool_name, storagedriver.mountpoint)) client.run('virsh pool-build {0}'.format(vpool_name)) client.run('virsh pool-start {0}'.format(vpool_name)) client.run('virsh pool-autostart {0}'.format(vpool_name)) # Start services for node in nodes: node_client = SSHClient.load(node) for service in services: System.exec_remote_python(node_client, """ from ovs.plugin.provider.service import Service Service.enable_service('{0}') """.format(service)) System.exec_remote_python(node_client, """ from ovs.plugin.provider.service import Service Service.start_service('{0}') """.format(service)) # Fill vPool size vfs_info = os.statvfs('/mnt/{0}'.format(vpool_name)) vpool.size = vfs_info.f_blocks * vfs_info.f_bsize vpool.save() # Configure Cinder ovsdb = PersistentFactory.get_client() vpool_config_key = str('ovs_openstack_cinder_%s' % storagedriver.vpool_guid) if ovsdb.exists(vpool_config_key): # Second node gets values saved by first node cinder_password, cinder_user, tenant_name, controller_ip, config_cinder = ovsdb.get(vpool_config_key) else: config_cinder = parameters.get('config_cinder', False) cinder_password = '' cinder_user = '' tenant_name = '' controller_ip = '' if config_cinder: cinder_password = parameters.get('cinder_pass', cinder_password) cinder_user = parameters.get('cinder_user', cinder_user) tenant_name = parameters.get('cinder_tenant', tenant_name) controller_ip = parameters.get('cinder_controller', controller_ip) # Keystone host if cinder_password: osc = OpenStackCinder(cinder_password = cinder_password, cinder_user = cinder_user, tenant_name = tenant_name, controller_ip = controller_ip) osc.configure_vpool(vpool_name, storagedriver.mountpoint) # Save values for first node to use ovsdb.set(vpool_config_key, [cinder_password, cinder_user, tenant_name, controller_ip, config_cinder])
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... * Successfully finishing a piece of migration code, should create an entry in /ovs/framework/migration in case it should not be executed again * Eg: /ovs/framework/migration|stats_monkey_integration: True """ MigrationController._logger.info('Preparing out of band migrations...') from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vpoollist import VPoolList from ovs.extensions.db.arakooninstaller import ArakoonInstaller from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs.extensions.migration.migration.ovsmigrator import ExtensionMigrator from ovs.extensions.packages.packagefactory import PackageFactory from ovs_extensions.services.interfaces.systemd import Systemd from ovs.extensions.services.servicefactory import ServiceFactory from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.lib.helpers.storagedriver.installer import StorageDriverInstaller MigrationController._logger.info('Start out of band migrations...') service_manager = ServiceFactory.get_manager() sr_client_map = {} for storagerouter in StorageRouterList.get_storagerouters(): sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter.ip, # Is triggered during post-update code too during which the ovs-watcher-framework service is still down and thus not refreshing the heartbeat --> use IP i/o StorageRouter username='******') ######################################################### # Addition of 'ExecReload' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ################################################################## # Adjustment of open file descriptors for Arakoon services to 8192 changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'LimitNOFILE=8192' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'limit nofile 8192 8192' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name)) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ############################# # Migrate to multiple proxies for storagedriver in StorageDriverList.get_storagedrivers(): vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: # Rename alba_proxy service in model service = alba_proxy.service old_service_name = 'albaproxy_{0}'.format(vpool.name) new_service_name = 'albaproxy_{0}_0'.format(vpool.name) if old_service_name != service.name: continue service.name = new_service_name service.save() if not service_manager.has_service(name=old_service_name, client=root_client): continue old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name) if not Configuration.exists(key=old_configuration_key): continue # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, old_service_name), new_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, new_service_name)) # Register new service and remove old service service_manager.add_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, params=Configuration.get(old_configuration_key), target_name='ovs-{0}'.format(new_service_name)) # Update scrub proxy config proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid) proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key) if proxy_config is not None: fragment_cache = proxy_config.get(StorageDriverConfiguration.CACHE_FRAGMENT, ['none', {}]) if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True: # Accelerated ALBA configured fragment_cache_scrub_info = copy.deepcopy(fragment_cache) fragment_cache_scrub_info[1]['cache_on_read'] = False proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid) proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key) if proxy_scrub_config is not None and proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] == ['none']: proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] = fragment_cache_scrub_info Configuration.set(key=proxy_scrub_config_key, value=proxy_scrub_config) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id) if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] if current_config.get('backend_type') != 'MULTI': changes = True backend_connection_manager = {'backend_type': 'MULTI'} for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])): backend_connection_manager[str(index)] = copy.deepcopy(current_config) # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_use_rora'] = True # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000 # noinspection PyUnresolvedReferences for key, value in backend_connection_manager[str(index)].items(): if key.startswith('backend_interface'): backend_connection_manager[key] = value # noinspection PyUnresolvedReferences del backend_connection_manager[str(index)][key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: backend_connection_manager[key] = value else: backend_connection_manager = current_config for value in backend_connection_manager.values(): if isinstance(value, dict): for key, val in value.items(): if key.startswith('backend_interface'): backend_connection_manager[key] = val changes = True del value[key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: changes = True backend_connection_manager[key] = value if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**backend_connection_manager) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name))) if service_manager.__class__ == Systemd: root_client.run(['systemctl', 'daemon-reload']) ######################################## # Update metadata_store_bits information vpools = VPoolList.get_vpools() for vpool in vpools: bits = None for storagedriver in vpool.storagedrivers: key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name) if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key): if bits is None: entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name), client=sr_client_map[storagedriver.storagerouter_guid], entries=['METADATASTORE_BITS=']) if len(entries) == 1: bits = entries[0].split('=')[-1] bits = int(bits) if bits.isdigit() else 5 if bits is not None: try: content = Configuration.get(key=key) content['METADATASTORE_BITS'] = bits Configuration.set(key=key, value=content) except: MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name)) if bits is not None: vpool.metadata_store_bits = bits vpool.save() ##################################### # Update the vPool metadata structure def _update_metadata_structure(metadata): metadata = copy.deepcopy(metadata) cache_structure = {'read': False, 'write': False, 'is_backend': False, 'quota': None, 'backend_info': {'name': None, # Will be filled in when is_backend is true 'backend_guid': None, 'alba_backend_guid': None, 'policies': None, 'preset': None, 'arakoon_config': None, 'connection_info': {'client_id': None, 'client_secret': None, 'host': None, 'port': None, 'local': None}} } structure_map = {StorageDriverConfiguration.CACHE_BLOCK: {'read': 'block_cache_on_read', 'write': 'block_cache_on_write', 'quota': 'quota_bc', 'backend_prefix': 'backend_bc_{0}'}, StorageDriverConfiguration.CACHE_FRAGMENT: {'read': 'fragment_cache_on_read', 'write': 'fragment_cache_on_write', 'quota': 'quota_fc', 'backend_prefix': 'backend_aa_{0}'}} if 'arakoon_config' in metadata['backend']: # Arakoon config should be placed under the backend info metadata['backend']['backend_info']['arakoon_config'] = metadata['backend'].pop('arakoon_config') if 'connection_info' in metadata['backend']: # Connection info sohuld be placed under the backend info metadata['backend']['backend_info']['connection_info'] = metadata['backend'].pop('connection_info') if 'caching_info' not in metadata: # Caching info is the new key would_be_caching_info = {} metadata['caching_info'] = would_be_caching_info # Extract all caching data for every storagerouter current_caching_info = metadata['backend'].pop('caching_info') # Pop to mutate metadata for storagerouter_guid in current_caching_info.iterkeys(): current_cache_data = current_caching_info[storagerouter_guid] storagerouter_caching_info = {} would_be_caching_info[storagerouter_guid] = storagerouter_caching_info for cache_type, cache_type_mapping in structure_map.iteritems(): new_cache_structure = copy.deepcopy(cache_structure) storagerouter_caching_info[cache_type] = new_cache_structure for new_structure_key, old_structure_key in cache_type_mapping.iteritems(): if new_structure_key == 'backend_prefix': # Get possible backend related info metadata_key = old_structure_key.format(storagerouter_guid) if metadata_key not in metadata: continue backend_data = metadata.pop(metadata_key) # Pop to mutate metadata new_cache_structure['is_backend'] = True # Copy over the old data new_cache_structure['backend_info']['arakoon_config'] = backend_data['arakoon_config'] new_cache_structure['backend_info'].update(backend_data['backend_info']) new_cache_structure['backend_info']['connection_info'].update(backend_data['connection_info']) else: new_cache_structure[new_structure_key] = current_cache_data.get(old_structure_key) return metadata vpools = VPoolList.get_vpools() for vpool in vpools: try: new_metadata = _update_metadata_structure(vpool.metadata) vpool.metadata = new_metadata vpool.save() except KeyError: MigrationController._logger.exception('Exceptions occurred when updating the metadata for vPool {0}'.format(vpool.name)) ############################################## # Always use indent=4 during Configuration set def _resave_all_config_entries(config_path='/ovs'): """ Recursive functions which checks every config management key if its a directory or not. If not a directory, we retrieve the config and just save it again using the new indentation logic """ for item in Configuration.list(config_path): new_path = config_path + '/' + item print new_path if Configuration.dir_exists(new_path) is True: _resave_all_config_entries(config_path=new_path) else: try: _config = Configuration.get(new_path) Configuration.set(new_path, _config) except: _config = Configuration.get(new_path, raw=True) Configuration.set(new_path, _config, raw=True) if ExtensionMigrator.THIS_VERSION <= 13: # There is no way of checking whether this new indentation logic has been applied, so we only perform this for version 13 and lower MigrationController._logger.info('Re-saving every configuration setting with new indentation rules') _resave_all_config_entries() ############################ # Update some default values def _update_manifest_cache_size(_proxy_config_key): updated = False manifest_cache_size = 500 * 1024 * 1024 if Configuration.exists(key=_proxy_config_key): _proxy_config = Configuration.get(key=_proxy_config_key) for cache_type in [StorageDriverConfiguration.CACHE_BLOCK, StorageDriverConfiguration.CACHE_FRAGMENT]: if cache_type in _proxy_config and _proxy_config[cache_type][0] == 'alba': if _proxy_config[cache_type][1]['manifest_cache_size'] != manifest_cache_size: updated = True _proxy_config[cache_type][1]['manifest_cache_size'] = manifest_cache_size if _proxy_config['manifest_cache_size'] != manifest_cache_size: updated = True _proxy_config['manifest_cache_size'] = manifest_cache_size if updated is True: Configuration.set(key=_proxy_config_key, value=_proxy_config) return updated for storagedriver in StorageDriverList.get_storagedrivers(): try: vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] _update_manifest_cache_size('/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)) # Generic scrub proxy is deployed every time scrubbing kicks in, so no need to restart these services for alba_proxy in storagedriver.alba_proxies: if _update_manifest_cache_size('/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)) is True: # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, alba_proxy.service.name)) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id) if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] for key, value in current_config.iteritems(): if key.isdigit() is True: if value.get('alba_connection_asd_connection_pool_capacity') != 10: changes = True value['alba_connection_asd_connection_pool_capacity'] = 10 if value.get('alba_connection_timeout') != 30: changes = True value['alba_connection_timeout'] = 30 if value.get('alba_connection_rora_manifest_cache_capacity') != 25000: changes = True value['alba_connection_rora_manifest_cache_capacity'] = 25000 if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**current_config) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name))) except Exception: MigrationController._logger.exception('Updating default configuration values failed for StorageDriver {0}'.format(storagedriver.storagedriver_id)) #################################################### # Adding proxy fail fast as env variable for proxies changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-albaproxy_'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'Environment=ALBA_FAIL_FAST=true' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'env ALBA_FAIL_FAST=true' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name)) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ###################################### # Integration of stats monkey (2.10.2) if Configuration.get(key='/ovs/framework/migration|stats_monkey_integration', default=False) is False: try: # Get content of old key into new key old_stats_monkey_key = '/statsmonkey/statsmonkey' if Configuration.exists(key=old_stats_monkey_key) is True: Configuration.set(key='/ovs/framework/monitoring/stats_monkey', value=Configuration.get(key=old_stats_monkey_key)) Configuration.delete(key=old_stats_monkey_key) # Make sure to disable the stats monkey by default or take over the current schedule if it was configured manually before celery_key = '/ovs/framework/scheduling/celery' current_value = None scheduling_config = Configuration.get(key=celery_key, default={}) if 'statsmonkey.run_all_stats' in scheduling_config: # Old celery task name of the stats monkey current_value = scheduling_config.pop('statsmonkey.run_all_stats') scheduling_config['ovs.stats_monkey.run_all'] = current_value scheduling_config['alba.stats_monkey.run_all'] = current_value Configuration.set(key=celery_key, value=scheduling_config) support_key = '/ovs/framework/support' support_config = Configuration.get(key=support_key) support_config['support_agent'] = support_config.pop('enabled', True) support_config['remote_access'] = support_config.pop('enablesupport', False) Configuration.set(key=support_key, value=support_config) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|stats_monkey_integration', value=True) except Exception: MigrationController._logger.exception('Integration of stats monkey failed') ###################################################### # Write away cluster ID to a file for back-up purposes try: cluster_id = Configuration.get(key='/ovs/framework/cluster_id', default=None) with open(Configuration.CONFIG_STORE_LOCATION, 'r') as config_file: config = json.load(config_file) if cluster_id is not None and config.get('cluster_id', None) is None: config['cluster_id'] = cluster_id with open(Configuration.CONFIG_STORE_LOCATION, 'w') as config_file: json.dump(config, config_file, indent=4) except Exception: MigrationController._logger.exception('Writing cluster id to a file failed.') ######################################################### # Additional string formatting in Arakoon services (2.11) try: if Configuration.get(key='/ovs/framework/migration|arakoon_service_update', default=False) is False: arakoon_service_names = [ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name) for cluster_name in Configuration.list(key='ovs/arakoon')] for storagerouter in StorageRouterList.get_masters(): for service_name in arakoon_service_names: config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagerouter.machine_id, service_name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['ARAKOON_PKG_NAME'] = PackageFactory.PKG_ARAKOON config['ARAKOON_VERSION_CMD'] = PackageFactory.VERSION_CMD_ARAKOON Configuration.set(key=config_key, value=config) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|arakoon_service_update', value=True) except Exception: MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed') ############################################################ # Additional string formatting in ALBA proxy services (2.11) changed_clients = set() try: if Configuration.get(key='/ovs/framework/migration|alba_proxy_service_update', default=False) is False: alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_ALBA) for service in ServiceTypeList.get_by_name('AlbaProxy').services: root_client = sr_client_map[service.storagerouter_guid] config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(service.storagerouter.machine_id, service.name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['ALBA_PKG_NAME'] = alba_pkg_name config['ALBA_VERSION_CMD'] = alba_version_cmd Configuration.set(key=config_key, value=config) service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name='ovs-{0}'.format(service.name)) changed_clients.add(root_client) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|alba_proxy_service_update', value=True) except Exception: MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed') ############################################################ # Additional string formatting in DTL/VOLDRV services (2.11) try: if Configuration.get(key='/ovs/framework/migration|voldrv_service_update', default=False) is False: sd_pkg_name, sd_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD) for vpool in VPoolList.get_vpools(): for storagedriver in vpool.storagedrivers: root_client = sr_client_map[storagedriver.storagerouter_guid] for entry in ['dtl', 'volumedriver']: service_name = '{0}_{1}'.format(entry, vpool.name) service_template = StorageDriverInstaller.SERVICE_TEMPLATE_DTL if entry == 'dtl' else StorageDriverInstaller.SERVICE_TEMPLATE_SD config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagedriver.storagerouter.machine_id, service_name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['VOLDRV_PKG_NAME'] = sd_pkg_name config['VOLDRV_VERSION_CMD'] = sd_version_cmd Configuration.set(key=config_key, value=config) service_manager.regenerate_service(name=service_template, client=root_client, target_name='ovs-{0}'.format(service_name)) changed_clients.add(root_client) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|voldrv_service_update', value=True) except Exception: MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed') ####################################################### # Storing actual package name in version files (2.11.0) (https://github.com/openvstorage/framework/issues/1876) if Configuration.get(key='/ovs/framework/migration|actual_package_name_in_version_file', default=False) is False: try: voldrv_pkg_name, _ = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD) for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map.get(storagerouter.guid) if root_client is None: continue for file_name in root_client.file_list(directory=ServiceFactory.RUN_FILE_DIR): if not file_name.endswith('.version'): continue file_path = '{0}/{1}'.format(ServiceFactory.RUN_FILE_DIR, file_name) contents = root_client.file_read(filename=file_path) regenerate = False if voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER: if 'volumedriver-server' in contents: regenerate = True contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER) root_client.file_write(filename=file_path, contents=contents) elif voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER_EE: if 'volumedriver-server' in contents or PackageFactory.PKG_VOLDRV_SERVER in contents: regenerate = True contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER_EE) contents = contents.replace(PackageFactory.PKG_VOLDRV_SERVER, PackageFactory.PKG_VOLDRV_SERVER_EE) root_client.file_write(filename=file_path, contents=contents) if regenerate is True: service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_DTL if file_name.startswith('dtl') else StorageDriverInstaller.SERVICE_TEMPLATE_SD, client=root_client, target_name='ovs-{0}'.format(file_name.split('.')[0])) # Leave out .version changed_clients.add(root_client) Configuration.set(key='/ovs/framework/migration|actual_package_name_in_version_file', value=True) except Exception: MigrationController._logger.exception('Updating actual package name for version files failed') for root_client in changed_clients: try: root_client.run(['systemctl', 'daemon-reload']) except Exception: MigrationController._logger.exception('Executing command "systemctl daemon-reload" failed') ######################################################### # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ######################################################### # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for Arakoon SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if not service_manager.has_service(name=service_name, client=root_client): continue if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) MigrationController._logger.info('Finished out of band migrations')
def gather_scrub_work(): """ Retrieve and execute scrub work :return: None """ ScheduledTaskController._logger.info('Gather Scrub - Started') scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: ScheduledTaskController._logger.info( 'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}' .format(storage_driver.storagerouter.ip, partition.path)) if storage_driver.storagerouter not in scrub_locations: try: sshclient = SSHClient(storage_driver.storagerouter) # Use ServiceManager(sshclient) to make sure ovs-workers are actually running if ServiceManager.get_service_status( 'workers', sshclient) is False: ScheduledTaskController._logger.warning( 'Gather Scrub - Storage Router {0:<15} - workers are not running' .format(storage_driver.storagerouter.ip)) else: scrub_locations[ storage_driver.storagerouter] = str( partition.path) except UnableToConnectException: ScheduledTaskController._logger.warning( 'Gather Scrub - Storage Router {0:<15} is not reachable' .format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError('No scrub locations found') vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) if len(vdisk_guids) == 0: ScheduledTaskController._logger.info( 'Gather Scrub - No scrub work needed'.format(len(vdisk_guids))) return ScheduledTaskController._logger.info( 'Gather Scrub - Checking {0} volumes for scrub work'.format( len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_storage_router = None local_scrub_location = None local_vdisks_to_scrub = [] result_set = {} storage_router_list = [] scrub_map = {} for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid ScheduledTaskController._logger.info( 'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks' .format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub))) if local is True: local_storage_router = storage_router local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set[storage_router. ip] = ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format( storage_router.machine_id)) storage_router_list.append(storage_router) scrub_map[storage_router.ip] = vdisk_guids_to_scrub # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish processed_guids = [] if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: try: processed_guids = ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub) except Exception as ex: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(local_storage_router.ip, ex)) all_results, failed_nodes = CeleryToolbox.manage_running_tasks( result_set, timesleep=60) # Check every 60 seconds if tasks are still running for ip, result in all_results.iteritems(): if isinstance(result, list): processed_guids.extend(result) else: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(ip, result)) result_set = {} for failed_node in failed_nodes: ScheduledTaskController._logger.warning( 'Scrubbing failed on node {0}. Will reschedule on another node.' .format(failed_node)) vdisk_guids_to_scrub = scrub_map[failed_node] rescheduled_work = False for storage_router, scrub_location in scrub_locations.items(): if storage_router.ip not in failed_nodes: if storage_router.machine_id != local_machineid: ScheduledTaskController._logger.info( 'Rescheduled scrub work from node {0} to node {1}.' .format(failed_node, storage_router.ip)) result_set[ storage_router. ip] = ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_location, vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format( storage_router.machine_id)) storage_router_list.append(storage_router) rescheduled_work = True break if rescheduled_work is False: if local_scrub_location is not None: try: processed_guids.extend( ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=vdisk_guids_to_scrub)) except Exception as ex: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}' .format(ex)) else: ScheduledTaskController._logger.warning( 'No nodes left to reschedule work from node {0}'. format(failed_node)) if len(result_set) > 0: all_results2, failed_nodes = CeleryToolbox.manage_running_tasks( result_set, timesleep=60 ) # Check every 60 seconds if tasks are still running for ip, result in all_results2.iteritems(): if isinstance(result, list): processed_guids.extend(result) else: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(ip, result)) if len(set(processed_guids)) != len(vdisk_guids) or set( processed_guids).difference(vdisk_guids): raise RuntimeError('Scrubbing failed for 1 or more storagerouters') ScheduledTaskController._logger.info('Gather Scrub - Finished')