def test_backoff_gap(self): """ Validates different node distances generated (to be passed into the StorageDriver) """ scenarios = { 1 * 1024**3: { 'backoff': int(1 * 1024**3 * 0.1), 'trigger': int(1 * 1024**3 * 0.08) }, 2 * 1024**4: { 'backoff': int(500 * 1024**3 * 0.1), # Upper limits based on 500GiB volume 'trigger': int(500 * 1024**3 * 0.08) }, 5: { 'backoff': 2, 'trigger': 1 }, None: { 'backoff': 2 * 1024**3, # Invalid size, return default 'trigger': 1 * 1024**3 } } for size, gap_config in scenarios.iteritems(): self.assertDictEqual( StorageDriverController.generate_backoff_gap_settings(size), gap_config)
def mark_storagerouter_reachable_for_ha(cls, storagerouter): # type: (StorageRouter) -> None """ Update the node distance map to add the storagerouter back into the HA pool :param storagerouter: Storagerouter to put back into the distance map :type storagerouter: StorageRouter :return: None """ cls.logger.info("Marking Storagerouter {} as available for HA".format( storagerouter.name)) Configuration.delete(os.path.join(VPOOL_UPDATE_KEY, storagerouter.guid)) # Trigger a complete reload of node distance maps StorageDriverController.cluster_registry_checkup() # Wait a few moment for the edge to catch up all the configs sleep_time = cls.get_edge_sync_time() cls.logger.info( "Waiting {} to sync up all edge clients".format(sleep_time)) time.sleep(sleep_time)
def mark_storagerouter_unreachable_for_ha(cls, storagerouter): """ Update the node distance maps to Current code paths that update the node distance map on the volumedriver side are: - Update of domains - Update of vpool layout (extend/shrink) - cluster registry checkup (ran periodically) :return: None :rtype: NoneType """ cls.logger.info( "Marking Storagerouter {} as unavailable for HA".format( storagerouter.name)) # Set the value used in the storagedriver cluster node config path # This holds for all mentioned paths in the docstrings Configuration.set(os.path.join(VPOOL_UPDATE_KEY, storagerouter.guid), 0) # Trigger a complete reload of node distance maps StorageDriverController.cluster_registry_checkup() # Wait a few moment for the edge to catch up all the configs sleep_time = cls.get_edge_sync_time() cls.logger.info( "Waiting {} to sync up all edge clients".format(sleep_time)) time.sleep(sleep_time)
def build_dal_structure(structure, previous_structure=None): """ Builds a model structure Example: structure = DalHelper.build_service_structure( {'vpools': [1], 'domains': [], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)], # (<id>, <vpool_id>, <storagerouter_id>) 'mds_services': [(1, 1)], # (<id>, <storagedriver_id>) 'storagerouter_domains': []} # (<id>, <storagerouter_id>, <domain_id>) ) """ Configuration.set(key=Configuration.EDITION_KEY, value=PackageFactory.EDITION_ENTERPRISE) if previous_structure is None: previous_structure = {} vdisks = previous_structure.get('vdisks', {}) vpools = previous_structure.get('vpools', {}) domains = previous_structure.get('domains', {}) services = previous_structure.get('services', {}) mds_services = previous_structure.get('mds_services', {}) storagerouters = previous_structure.get('storagerouters', {}) storagedrivers = previous_structure.get('storagedrivers', {}) storagerouter_domains = previous_structure.get('storagerouter_domains', {}) service_types = {} for service_type_name in ServiceType.SERVICE_TYPES.values(): service_type = ServiceTypeList.get_by_name(service_type_name) if service_type is None: service_type = ServiceType() service_type.name = service_type_name service_type.save() service_types[service_type_name] = service_type srclients = {} for domain_id in structure.get('domains', []): if domain_id not in domains: domain = Domain() domain.name = 'domain_{0}'.format(domain_id) domain.save() domains[domain_id] = domain for vpool_id in structure.get('vpools', []): if vpool_id not in vpools: vpool = VPool() vpool.name = str(vpool_id) vpool.status = 'RUNNING' vpool.metadata = {'backend': {}, 'caching_info': {}} vpool.metadata_store_bits = 5 vpool.save() vpools[vpool_id] = vpool else: vpool = vpools[vpool_id] srclients[vpool_id] = StorageRouterClient(vpool.guid, None) Configuration.set( '/ovs/vpools/{0}/mds_config|mds_tlogs'.format(vpool.guid), 100) Configuration.set( '/ovs/vpools/{0}/mds_config|mds_safety'.format(vpool.guid), 2) Configuration.set( '/ovs/vpools/{0}/mds_config|mds_maxload'.format(vpool.guid), 75) Configuration.set( '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format( vpool.guid), json.dumps({}, indent=4), raw=True) for sr_id in structure.get('storagerouters', []): if sr_id not in storagerouters: storagerouter = StorageRouter() storagerouter.name = str(sr_id) storagerouter.ip = '10.0.0.{0}'.format(sr_id) storagerouter.rdma_capable = False storagerouter.node_type = 'MASTER' storagerouter.machine_id = str(sr_id) storagerouter.save() storagerouters[sr_id] = storagerouter disk = Disk() disk.storagerouter = storagerouter disk.state = 'OK' disk.name = '/dev/uda' disk.size = 1 * 1024**4 disk.is_ssd = True disk.aliases = ['/dev/uda'] disk.save() partition = DiskPartition() partition.offset = 0 partition.size = disk.size partition.aliases = ['/dev/uda-1'] partition.state = 'OK' partition.mountpoint = '/tmp/unittest/sr_{0}/disk_1/partition_1'.format( sr_id) partition.disk = disk partition.roles = [ DiskPartition.ROLES.DB, DiskPartition.ROLES.SCRUB ] partition.save() else: storagerouter = storagerouters[sr_id] # noinspection PyProtectedMember System._machine_id[storagerouter.ip] = str(sr_id) mds_start = 10000 + 100 * (sr_id - 1) mds_end = 10000 + 100 * sr_id - 1 arakoon_start = 20000 + 100 * (sr_id - 1) storagedriver_start = 30000 + 100 * (sr_id - 1) storagedriver_end = 30000 + 100 * sr_id - 1 Configuration.initialize_host( host_id=sr_id, port_info={ 'mds': [mds_start, mds_end], 'arakoon': arakoon_start, 'storagedriver': [storagedriver_start, storagedriver_end] }) for sd_id, vpool_id, sr_id in structure.get('storagedrivers', ()): if sd_id not in storagedrivers: storagedriver = StorageDriver() storagedriver.vpool = vpools[vpool_id] storagedriver.storagerouter = storagerouters[sr_id] storagedriver.name = str(sd_id) storagedriver.mountpoint = '/' storagedriver.cluster_ip = storagerouters[sr_id].ip storagedriver.storage_ip = '10.0.1.{0}'.format(sr_id) storagedriver.storagedriver_id = str(sd_id) storagedriver.ports = { 'management': 1, 'xmlrpc': 2, 'dtl': 3, 'edge': 4 } storagedriver.save() storagedrivers[sd_id] = storagedriver DalHelper.set_vpool_storage_driver_configuration( vpool=vpools[vpool_id], storagedriver=storagedriver) for mds_id, sd_id in structure.get('mds_services', ()): if mds_id not in mds_services: sd = storagedrivers[sd_id] s_id = '{0}-{1}'.format(sd.storagerouter.name, mds_id) service = Service() service.name = s_id service.storagerouter = sd.storagerouter service.ports = [mds_id] service.type = service_types['MetadataServer'] service.save() services[s_id] = service mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = sd.vpool mds_service.save() mds_services[mds_id] = mds_service StorageDriverController.add_storagedriverpartition( sd, { 'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': sd.storagerouter.disks[0].partitions[0], 'mds_service': mds_service }) for vdisk_id, storage_driver_id, vpool_id, mds_id in structure.get( 'vdisks', ()): if vdisk_id not in vdisks: vpool = vpools[vpool_id] devicename = 'vdisk_{0}'.format(vdisk_id) mds_backend_config = DalHelper.generate_mds_metadata_backend_config( [] if mds_id is None else [mds_services[mds_id]]) volume_id = srclients[vpool_id].create_volume( devicename, mds_backend_config, 0, str(storage_driver_id)) vdisk = VDisk() vdisk.name = str(vdisk_id) vdisk.devicename = devicename vdisk.volume_id = volume_id vdisk.vpool = vpool vdisk.size = 0 vdisk.save() vdisk.reload_client('storagedriver') vdisks[vdisk_id] = vdisk for srd_id, sr_id, domain_id, backup in structure.get( 'storagerouter_domains', ()): if srd_id not in storagerouter_domains: sr_domain = StorageRouterDomain() sr_domain.backup = backup sr_domain.domain = domains[domain_id] sr_domain.storagerouter = storagerouters[sr_id] sr_domain.save() storagerouter_domains[srd_id] = sr_domain return { 'vdisks': vdisks, 'vpools': vpools, 'domains': domains, 'services': services, 'mds_services': mds_services, 'service_types': service_types, 'storagerouters': storagerouters, 'storagedrivers': storagedrivers, 'storagerouter_domains': storagerouter_domains }
def test_node_config_checkup(self): """ Validates correct working of cluster registry checkup """ base_structure = { '1': { 'vrouter_id': '1', 'message_host': '10.0.1.1', 'message_port': 1, 'xmlrpc_host': '10.0.0.1', 'xmlrpc_port': 2, 'failovercache_host': '10.0.1.1', 'failovercache_port': 3, 'network_server_uri': 'tcp://10.0.1.1:4', 'node_distance_map': None }, '2': { 'vrouter_id': '2', 'message_host': '10.0.1.2', 'message_port': 1, 'xmlrpc_host': '10.0.0.2', 'xmlrpc_port': 2, 'failovercache_host': '10.0.1.2', 'failovercache_port': 3, 'network_server_uri': 'tcp://10.0.1.2:4', 'node_distance_map': None } } def _validate_node_config(_config, _expected_map): expected = copy.deepcopy(base_structure[_config.vrouter_id]) expected['node_distance_map'] = _expected_map[_config.vrouter_id] self.assertDictEqual( expected, { 'vrouter_id': _config.vrouter_id, 'message_host': _config.message_host, 'message_port': _config.message_port, 'xmlrpc_host': _config.xmlrpc_host, 'xmlrpc_port': _config.xmlrpc_port, 'failovercache_host': _config.failovercache_host, 'failovercache_port': _config.failovercache_port, 'network_server_uri': _config.network_server_uri, 'node_distance_map': _config.node_distance_map }) structure = DalHelper.build_dal_structure({ 'vpools': [1], 'domains': [1, 2], 'storagerouters': [1, 2], 'storagedrivers': [(1, 1, 1), (2, 1, 2)], # (<id>, <vpool_id>, <storagerouter_id>) 'storagerouter_domains': [(1, 1, 1, False), (2, 2, 1, False)] } # (id>, <storagerouter_id>, <domain_id>, <backup>) ) storagerouters = structure['storagerouters'] vpool = structure['vpools'][1] arakoon_installer = ArakoonInstaller(cluster_name='voldrv') arakoon_installer.create_cluster( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD, ip=storagerouters[1].ip, base_dir='/tmp') # Initial run, it will now be configured StorageRouterClient.node_config_recordings = [] result = StorageDriverController.cluster_registry_checkup() self.assertDictEqual(result, {vpool.guid: { 'success': True, 'changes': True }}) self.assertListEqual( sorted(StorageRouterClient.node_config_recordings), ['1', '2']) expected_map = { '1': { '2': StorageDriver.DISTANCES.NEAR }, '2': { '1': StorageDriver.DISTANCES.NEAR } } configs = vpool.clusterregistry_client.get_node_configs() for config in configs: _validate_node_config(config, expected_map) # Running it again should not change anything StorageRouterClient.node_config_recordings = [] result = StorageDriverController.cluster_registry_checkup() self.assertDictEqual(result, {vpool.guid: { 'success': True, 'changes': False }}) self.assertListEqual( sorted(StorageRouterClient.node_config_recordings), []) expected_map = { '1': { '2': StorageDriver.DISTANCES.NEAR }, '2': { '1': StorageDriver.DISTANCES.NEAR } } configs = vpool.clusterregistry_client.get_node_configs() for config in configs: _validate_node_config(config, expected_map) # Validate some error paths domain = structure['domains'][2] junction = structure['storagerouters'][1].domains[0] junction.domain = domain junction.save() vpool_config_path = 'file://opt/OpenvStorage/config/framework.json?key=/ovs/vpools/{0}/hosts/1/config'.format( vpool.guid) StorageRouterClient.exceptions['server_revision'] = { vpool_config_path: Exception('ClusterNotReachableException') } StorageRouterClient.node_config_recordings = [] result = StorageDriverController.cluster_registry_checkup() self.assertDictEqual(result, {vpool.guid: { 'success': True, 'changes': True }}) self.assertListEqual( sorted(StorageRouterClient.node_config_recordings), ['2']) expected_map = { '1': { '2': StorageDriver.DISTANCES.INFINITE }, '2': { '1': StorageDriver.DISTANCES.INFINITE } } configs = vpool.clusterregistry_client.get_node_configs() for config in configs: _validate_node_config(config, expected_map)
def build_service_structure(structure, previous_structure=None): """ Builds an MDS service structure Example: structure = Helper.build_service_structure( {'vpools': [1], 'domains': [], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)], # (<id>, <vpool_id>, <storagerouter_id>) 'mds_services': [(1, 1)], # (<id>, <storagedriver_id>) 'storagerouter_domains': []} # (<id>, <storagerouter_id>, <domain_id>) ) """ if previous_structure is None: previous_structure = {} vdisks = previous_structure.get("vdisks", {}) vpools = previous_structure.get("vpools", {}) domains = previous_structure.get("domains", {}) services = previous_structure.get("services", {}) mds_services = previous_structure.get("mds_services", {}) storagerouters = previous_structure.get("storagerouters", {}) storagedrivers = previous_structure.get("storagedrivers", {}) storagerouter_domains = previous_structure.get("storagerouter_domains", {}) service_type = ServiceTypeList.get_by_name("MetadataServer") if service_type is None: service_type = ServiceType() service_type.name = "MetadataServer" service_type.save() srclients = {} for domain_id in structure.get("domains", []): if domain_id not in domains: domain = Domain() domain.name = "domain_{0}".format(domain_id) domain.save() domains[domain_id] = domain for vpool_id in structure.get("vpools", []): if vpool_id not in vpools: vpool = VPool() vpool.name = str(vpool_id) vpool.status = "RUNNING" vpool.save() vpools[vpool_id] = vpool else: vpool = vpools[vpool_id] srclients[vpool_id] = StorageRouterClient(vpool.guid, None) for sr_id in structure.get("storagerouters", []): if sr_id not in storagerouters: storagerouter = StorageRouter() storagerouter.name = str(sr_id) storagerouter.ip = "10.0.0.{0}".format(sr_id) storagerouter.rdma_capable = False storagerouter.node_type = "MASTER" storagerouter.machine_id = str(sr_id) storagerouter.save() storagerouters[sr_id] = storagerouter disk = Disk() disk.storagerouter = storagerouter disk.state = "OK" disk.name = "/dev/uda" disk.size = 1 * 1024 ** 4 disk.is_ssd = True disk.aliases = ["/dev/uda"] disk.save() partition = DiskPartition() partition.offset = 0 partition.size = disk.size partition.aliases = ["/dev/uda-1"] partition.state = "OK" partition.mountpoint = "/tmp/unittest/sr_{0}/disk_1/partition_1".format(sr_id) partition.disk = disk partition.roles = [DiskPartition.ROLES.DB, DiskPartition.ROLES.SCRUB] partition.save() for sd_id, vpool_id, sr_id in structure.get("storagedrivers", ()): if sd_id not in storagedrivers: storagedriver = StorageDriver() storagedriver.vpool = vpools[vpool_id] storagedriver.storagerouter = storagerouters[sr_id] storagedriver.name = str(sd_id) storagedriver.mountpoint = "/" storagedriver.cluster_ip = storagerouters[sr_id].ip storagedriver.storage_ip = "10.0.1.{0}".format(sr_id) storagedriver.storagedriver_id = str(sd_id) storagedriver.ports = {"management": 1, "xmlrpc": 2, "dtl": 3, "edge": 4} storagedriver.save() storagedrivers[sd_id] = storagedriver Helper._set_vpool_storage_driver_configuration(vpool=vpools[vpool_id], storagedriver=storagedriver) for mds_id, sd_id in structure.get("mds_services", ()): if mds_id not in mds_services: sd = storagedrivers[sd_id] s_id = "{0}-{1}".format(sd.storagerouter.name, mds_id) service = Service() service.name = s_id service.storagerouter = sd.storagerouter service.ports = [mds_id] service.type = service_type service.save() services[s_id] = service mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = sd.vpool mds_service.save() mds_services[mds_id] = mds_service StorageDriverController.add_storagedriverpartition( sd, { "size": None, "role": DiskPartition.ROLES.DB, "sub_role": StorageDriverPartition.SUBROLE.MDS, "partition": sd.storagerouter.disks[0].partitions[0], "mds_service": mds_service, }, ) for vdisk_id, storage_driver_id, vpool_id, mds_id in structure.get("vdisks", ()): if vdisk_id not in vdisks: vpool = vpools[vpool_id] devicename = "vdisk_{0}".format(vdisk_id) mds_backend_config = Helper._generate_mdsmetadatabackendconfig( [] if mds_id is None else [mds_services[mds_id]] ) volume_id = srclients[vpool_id].create_volume(devicename, mds_backend_config, 0, str(storage_driver_id)) vdisk = VDisk() vdisk.name = str(vdisk_id) vdisk.devicename = devicename vdisk.volume_id = volume_id vdisk.vpool = vpool vdisk.size = 0 vdisk.save() vdisk.reload_client("storagedriver") vdisks[vdisk_id] = vdisk for srd_id, sr_id, domain_id, backup in structure.get("storagerouter_domains", ()): if srd_id not in storagerouter_domains: sr_domain = StorageRouterDomain() sr_domain.backup = backup sr_domain.domain = domains[domain_id] sr_domain.storagerouter = storagerouters[sr_id] sr_domain.save() storagerouter_domains[srd_id] = sr_domain return { "vdisks": vdisks, "vpools": vpools, "domains": domains, "services": services, "service_type": service_type, "mds_services": mds_services, "storagerouters": storagerouters, "storagedrivers": storagedrivers, "storagerouter_domains": storagerouter_domains, }
def prepare_mds_service(storagerouter, vpool, fresh_only, reload_config): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. :param storagerouter: Storagerouter on which MDS service will be created :type storagerouter: StorageRouter :param vpool: The vPool for which the MDS service will be created :type vpool: VPool :param fresh_only: If True and no current mds services exist for this vpool on this storagerouter, a new 1 will be created :type fresh_only: bool :param reload_config: If True, the volumedriver's updated configuration will be reloaded :type reload_config: bool :return: Newly created service :rtype: MDSService """ # Fetch service sequence number based on MDS services for current vPool and current storage router service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return # There is already 1 or more MDS services running, aborting # VALIDATIONS # 1. Find free port based on MDS services for all vPools on current storage router client = SSHClient(storagerouter) mdsservice_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.MD_SERVER) occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.extend(service.ports) mds_port_range = Configuration.get( '/ovs/framework/hosts/{0}/ports|mds'.format( System.get_my_machine_id(client))) free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, nr=1, client=client) if not free_ports: raise RuntimeError( 'Failed to find an available port on storage router {0} within range {1}' .format(storagerouter.name, mds_port_range)) # 2. Partition check db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition break if db_partition is None: raise RuntimeError( 'Could not find DB partition on storage router {0}'.format( storagerouter.name)) # 3. Verify storage driver configured storagedrivers = [ sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid ] if not storagedrivers: raise RuntimeError( 'Expected to find a configured storagedriver for vpool {0} on storage router {1}' .format(vpool.name, storagerouter.name)) storagedriver = storagedrivers[0] # MODEL UPDATES # 1. Service service_number += 1 service = Service() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.ports = [free_ports[0]] service.storagerouter = storagerouter service.save() mds_service = MDSService() mds_service.vpool = vpool mds_service.number = service_number mds_service.service = service mds_service.save() # 2. Storage driver partitions from ovs.lib.storagedriver import StorageDriverController StorageDriverController.add_storagedriverpartition( storagedriver, { 'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': db_partition, 'mds_service': mds_service }) # CONFIGURATIONS # 1. Volumedriver mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service is not None: if mds_service.vpool_guid == vpool.guid: sdp = [ sd_partition for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS ][0] mds_nodes.append({ 'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': sdp.path, 'scratch_directory': sdp.path }) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def voldrv_arakoon_checkup(): """ Execute the scheduled task voldrv arakoon checkup :return: None """ StorageDriverController.scheduled_voldrv_arakoon_checkup() # No API available
def build_service_structure(structure, previous_structure=None): """ Builds an MDS service structure Example: structure = Helper.build_service_structure( {'vpools': [1], 'domains': [], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)], # (<id>, <vpool_id>, <storagerouter_id>) 'mds_services': [(1, 1)], # (<id>, <storagedriver_id>) 'storagerouter_domains': []} # (<id>, <storagerouter_id>, <domain_id>) ) """ if previous_structure is None: previous_structure = {} vdisks = previous_structure.get('vdisks', {}) vpools = previous_structure.get('vpools', {}) domains = previous_structure.get('domains', {}) services = previous_structure.get('services', {}) mds_services = previous_structure.get('mds_services', {}) storagerouters = previous_structure.get('storagerouters', {}) storagedrivers = previous_structure.get('storagedrivers', {}) storagerouter_domains = previous_structure.get('storagerouter_domains', {}) service_type = ServiceTypeList.get_by_name('MetadataServer') if service_type is None: service_type = ServiceType() service_type.name = 'MetadataServer' service_type.save() srclients = {} for domain_id in structure.get('domains', []): if domain_id not in domains: domain = Domain() domain.name = 'domain_{0}'.format(domain_id) domain.save() domains[domain_id] = domain for vpool_id in structure.get('vpools', []): if vpool_id not in vpools: vpool = VPool() vpool.name = str(vpool_id) vpool.status = 'RUNNING' vpool.save() vpools[vpool_id] = vpool else: vpool = vpools[vpool_id] srclients[vpool_id] = StorageRouterClient(vpool.guid, None) for sr_id in structure.get('storagerouters', []): if sr_id not in storagerouters: storagerouter = StorageRouter() storagerouter.name = str(sr_id) storagerouter.ip = '10.0.0.{0}'.format(sr_id) storagerouter.rdma_capable = False storagerouter.node_type = 'MASTER' storagerouter.machine_id = str(sr_id) storagerouter.save() storagerouters[sr_id] = storagerouter disk = Disk() disk.storagerouter = storagerouter disk.state = 'OK' disk.name = '/dev/uda' disk.size = 1 * 1024**4 disk.is_ssd = True disk.aliases = ['/dev/uda'] disk.save() partition = DiskPartition() partition.offset = 0 partition.size = disk.size partition.aliases = ['/dev/uda-1'] partition.state = 'OK' partition.mountpoint = '/tmp/unittest/sr_{0}/disk_1/partition_1'.format( sr_id) partition.disk = disk partition.roles = [ DiskPartition.ROLES.DB, DiskPartition.ROLES.SCRUB ] partition.save() for sd_id, vpool_id, sr_id in structure.get('storagedrivers', ()): if sd_id not in storagedrivers: storagedriver = StorageDriver() storagedriver.vpool = vpools[vpool_id] storagedriver.storagerouter = storagerouters[sr_id] storagedriver.name = str(sd_id) storagedriver.mountpoint = '/' storagedriver.cluster_ip = storagerouters[sr_id].ip storagedriver.storage_ip = '10.0.1.{0}'.format(sr_id) storagedriver.storagedriver_id = str(sd_id) storagedriver.ports = { 'management': 1, 'xmlrpc': 2, 'dtl': 3, 'edge': 4 } storagedriver.save() storagedrivers[sd_id] = storagedriver Helper._set_vpool_storage_driver_configuration( vpool=vpools[vpool_id], storagedriver=storagedriver) for mds_id, sd_id in structure.get('mds_services', ()): if mds_id not in mds_services: sd = storagedrivers[sd_id] s_id = '{0}-{1}'.format(sd.storagerouter.name, mds_id) service = Service() service.name = s_id service.storagerouter = sd.storagerouter service.ports = [mds_id] service.type = service_type service.save() services[s_id] = service mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = sd.vpool mds_service.save() mds_services[mds_id] = mds_service StorageDriverController.add_storagedriverpartition( sd, { 'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': sd.storagerouter.disks[0].partitions[0], 'mds_service': mds_service }) for vdisk_id, storage_driver_id, vpool_id, mds_id in structure.get( 'vdisks', ()): if vdisk_id not in vdisks: vpool = vpools[vpool_id] devicename = 'vdisk_{0}'.format(vdisk_id) mds_backend_config = Helper._generate_mdsmetadatabackendconfig( [] if mds_id is None else [mds_services[mds_id]]) volume_id = srclients[vpool_id].create_volume( devicename, mds_backend_config, 0, str(storage_driver_id)) vdisk = VDisk() vdisk.name = str(vdisk_id) vdisk.devicename = devicename vdisk.volume_id = volume_id vdisk.vpool = vpool vdisk.size = 0 vdisk.save() vdisk.reload_client('storagedriver') vdisks[vdisk_id] = vdisk for srd_id, sr_id, domain_id, backup in structure.get( 'storagerouter_domains', ()): if srd_id not in storagerouter_domains: sr_domain = StorageRouterDomain() sr_domain.backup = backup sr_domain.domain = domains[domain_id] sr_domain.storagerouter = storagerouters[sr_id] sr_domain.save() storagerouter_domains[srd_id] = sr_domain return { 'vdisks': vdisks, 'vpools': vpools, 'domains': domains, 'services': services, 'service_type': service_type, 'mds_services': mds_services, 'storagerouters': storagerouters, 'storagedrivers': storagedrivers, 'storagerouter_domains': storagerouter_domains }
def prepare_mds_service(client, storagerouter, vpool, fresh_only=True, reload_config=False): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. """ from ovs.lib.storagedriver import StorageDriverController mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') storagedriver = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid][0] # Fetch service sequence number service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return None # There are already one or more MDS services running, aborting service_number += 1 # Find free port occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.append(service.ports[0]) port = System.get_free_ports(Configuration.get('ovs.ports.mds'), exclude=occupied_ports, nr=1, client=client)[0] # Add service to the model service = DalService() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.storagerouter = storagerouter service.ports = [port] service.save() mds_service = MDSService() mds_service.service = service mds_service.vpool = vpool mds_service.number = service_number mds_service.save() scrub_partition = None db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition if DiskPartition.ROLES.SCRUB in partition.roles: scrub_partition = partition if scrub_partition is None or db_partition is None: raise RuntimeError('Could not find DB or SCRUB partition on StorageRouter {0}'.format(storagerouter.name)) StorageDriverController.add_storagedriverpartition(storagedriver, {'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': db_partition, 'mds_service': mds_service}) StorageDriverController.add_storagedriverpartition(storagedriver, {'size': None, 'role': DiskPartition.ROLES.SCRUB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': scrub_partition, 'mds_service': mds_service}) mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS][0], 'scratch_directory': [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.SCRUB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS][0]}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def configure_storagedriver_service(self): """ Configure the StorageDriver service :return: None :rtype: NoneType """ def _generate_queue_urls(): mq_user = Configuration.get('/ovs/framework/messagequeue|user') mq_protocol = Configuration.get('/ovs/framework/messagequeue|protocol') mq_password = Configuration.get('/ovs/framework/messagequeue|password') return [{'amqp_uri': '{0}://{1}:{2}@{3}:5672'.format(mq_protocol, mq_user, mq_password, sr.ip)} for sr in StorageRouterList.get_masters()] def _generate_config_file_system(): config = {'fs_dtl_host': '', 'fs_enable_shm_interface': 0, 'fs_enable_network_interface': 1, 'fs_metadata_backend_arakoon_cluster_nodes': [], 'fs_metadata_backend_mds_nodes': [], 'fs_metadata_backend_type': 'MDS', 'fs_virtual_disk_format': 'raw', 'fs_raw_disk_suffix': '.raw', 'fs_file_event_rules': [{'fs_file_event_rule_calls': ['Rename'], 'fs_file_event_rule_path_regex': '.*'}]} if self.dtl_mode == StorageDriverClient.FRAMEWORK_DTL_NO_SYNC: config['fs_dtl_config_mode'] = StorageDriverClient.VOLDRV_DTL_MANUAL_MODE else: config['fs_dtl_mode'] = StorageDriverClient.VPOOL_DTL_MODE_MAP[self.dtl_mode] config['fs_dtl_config_mode'] = StorageDriverClient.VOLDRV_DTL_AUTOMATIC_MODE return config def _generate_config_backend_connection_manager(): config = {'backend_type': 'MULTI', 'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0} for index, proxy in enumerate(sorted(self.storagedriver.alba_proxies, key=lambda k: k.service.ports[0])): config[str(index)] = {'alba_connection_host': self.storagedriver.storage_ip, 'alba_connection_port': proxy.service.ports[0], 'alba_connection_preset': vpool.metadata['backend']['backend_info']['preset'], 'alba_connection_timeout': 30, 'alba_connection_use_rora': True, 'alba_connection_transport': 'TCP', 'alba_connection_rora_manifest_cache_capacity': 25000, 'alba_connection_asd_connection_pool_capacity': 10, 'alba_connection_rora_timeout_msecs': 50, 'backend_type': 'ALBA'} return config if self.sr_installer is None: raise RuntimeError('No StorageRouterInstaller instance found') if len(self.write_caches) == 0: raise RuntimeError('The StorageDriverPartition junctions have not been created yet') vpool = self.vp_installer.vpool gap_configuration = StorageDriverController.calculate_trigger_and_backoff_gap(cache_size=self.sr_installer.smallest_write_partition_size) arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|voldrv')) arakoon_nodes = [{'host': node.ip, 'port': node.client_port, 'node_id': node.name} for node in ArakoonClusterConfig(cluster_id=arakoon_cluster_name).nodes] storagedriver_config = StorageDriverConfiguration(vpool.guid, self.storagedriver.storagedriver_id) storagedriver_config.configure_scocache(scocache_mount_points=self.write_caches, trigger_gap=ExtensionsToolbox.convert_byte_size_to_human_readable(size=gap_configuration['trigger']), backoff_gap=ExtensionsToolbox.convert_byte_size_to_human_readable(size=gap_configuration['backoff'])) storagedriver_config.configure_file_driver(fd_cache_path=self.storagedriver_partition_file_driver.path, fd_extent_cache_capacity='1024', fd_namespace='fd-{0}-{1}'.format(vpool.name, vpool.guid)) storagedriver_config.configure_volume_router(vrouter_id=self.storagedriver.storagedriver_id, vrouter_redirect_timeout_ms='120000', vrouter_keepalive_time_secs='15', vrouter_keepalive_interval_secs='5', vrouter_keepalive_retries='2', vrouter_routing_retries=10, vrouter_volume_read_threshold=0, vrouter_volume_write_threshold=0, vrouter_file_read_threshold=0, vrouter_file_write_threshold=0, vrouter_min_workers=4, vrouter_max_workers=16, vrouter_sco_multiplier=self.sco_size * 1024 / self.cluster_size, vrouter_backend_sync_timeout_ms=60000, vrouter_migrate_timeout_ms=60000, vrouter_use_fencing=True) storagedriver_config.configure_volume_manager(tlog_path=self.storagedriver_partition_tlogs.path, metadata_path=self.storagedriver_partition_metadata.path, clean_interval=1, dtl_throttle_usecs=4000, default_cluster_size=self.cluster_size * 1024, number_of_scos_in_tlog=self.tlog_multiplier, non_disposable_scos_factor=float(self.write_buffer) / self.tlog_multiplier / self.sco_size) storagedriver_config.configure_event_publisher(events_amqp_routing_key=Configuration.get('/ovs/framework/messagequeue|queues.storagedriver'), events_amqp_uris=_generate_queue_urls()) storagedriver_config.configure_volume_registry(vregistry_arakoon_cluster_id=arakoon_cluster_name, vregistry_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.configure_network_interface(network_max_neighbour_distance=StorageDriver.DISTANCES.FAR - 1) storagedriver_config.configure_threadpool_component(num_threads=16) storagedriver_config.configure_volume_router_cluster(vrouter_cluster_id=vpool.guid) storagedriver_config.configure_distributed_lock_store(dls_type='Arakoon', dls_arakoon_cluster_id=arakoon_cluster_name, dls_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.configure_content_addressed_cache(serialize_read_cache=False, read_cache_serialization_path=[]) storagedriver_config.configure_distributed_transaction_log(dtl_path=self.storagedriver_partition_dtl.path, # Not used, but required dtl_transport=StorageDriverClient.VPOOL_DTL_TRANSPORT_MAP[self.dtl_transport]) storagedriver_config.configure_filesystem(**_generate_config_file_system()) storagedriver_config.configure_backend_connection_manager(**_generate_config_backend_connection_manager()) storagedriver_config.save(client=self.sr_installer.root_client)
def create_partitions(self): """ Configure all partitions for a StorageDriver (junctions between a StorageDriver and a DiskPartition) :raises: ValueError: - When calculating the cache sizes went wrong :return: Dict with information about the created items :rtype: dict """ if self.storagedriver is None: raise RuntimeError('A StorageDriver needs to be created first') if self.sr_installer is None: raise RuntimeError('No StorageRouterInstaller instance found') # Assign WRITE / Fragment cache for writecache_info in self.sr_installer.write_partitions: available = writecache_info['available'] partition = DiskPartition(writecache_info['guid']) proportion = available * 100.0 / self.sr_installer.global_write_buffer_available_size size_to_be_used = proportion * self.sr_installer.global_write_buffer_requested_size / 100 write_cache_percentage = 0.98 if self.sr_installer.requested_local_proxies > 0 and partition == self.sr_installer.largest_write_partition: # At least 1 local proxy has been requested either for fragment or block cache self.cache_size_local = int(size_to_be_used * 0.10) # Bytes write_cache_percentage = 0.88 for _ in xrange(self.sr_installer.requested_proxies): storagedriver_partition_cache = StorageDriverController.add_storagedriverpartition(storagedriver=self.storagedriver, partition_info={'size': None, 'role': DiskPartition.ROLES.WRITE, 'sub_role': StorageDriverPartition.SUBROLE.FCACHE, 'partition': partition}) self.sr_installer.created_dirs.append(storagedriver_partition_cache.path) if self.block_cache_local is True: self.sr_installer.created_dirs.append('{0}/bc'.format(storagedriver_partition_cache.path)) if self.fragment_cache_local is True: self.sr_installer.created_dirs.append('{0}/fc'.format(storagedriver_partition_cache.path)) self.storagedriver_partitions_caches.append(storagedriver_partition_cache) w_size = int(size_to_be_used * write_cache_percentage / 1024 / 4096) * 4096 storagedriver_partition_write = StorageDriverController.add_storagedriverpartition(storagedriver=self.storagedriver, partition_info={'size': long(size_to_be_used), 'role': DiskPartition.ROLES.WRITE, 'sub_role': StorageDriverPartition.SUBROLE.SCO, 'partition': partition}) self.write_caches.append({'path': storagedriver_partition_write.path, 'size': '{0}KiB'.format(w_size)}) self.sr_installer.created_dirs.append(storagedriver_partition_write.path) if self.sr_installer.smallest_write_partition_size in [0, None] or (w_size * 1024) < self.sr_installer.smallest_write_partition_size: self.sr_installer.smallest_write_partition_size = w_size * 1024 # Verify cache size if self.cache_size_local is None and (self.block_cache_local is True or self.fragment_cache_local is True): raise ValueError('Something went wrong trying to calculate the cache sizes') # Assign FD partition self.storagedriver_partition_file_driver = StorageDriverController.add_storagedriverpartition(storagedriver=self.storagedriver, partition_info={'size': None, 'role': DiskPartition.ROLES.WRITE, 'sub_role': StorageDriverPartition.SUBROLE.FD, 'partition': self.sr_installer.largest_write_partition}) self.sr_installer.created_dirs.append(self.storagedriver_partition_file_driver.path) # Assign DB partition db_info = self.sr_installer.partition_info[DiskPartition.ROLES.DB][0] self.storagedriver_partition_tlogs = StorageDriverController.add_storagedriverpartition(storagedriver=self.storagedriver, partition_info={'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.TLOG, 'partition': DiskPartition(db_info['guid'])}) self.storagedriver_partition_metadata = StorageDriverController.add_storagedriverpartition(storagedriver=self.storagedriver, partition_info={'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MD, 'partition': DiskPartition(db_info['guid'])}) self.sr_installer.created_dirs.append(self.storagedriver_partition_tlogs.path) self.sr_installer.created_dirs.append(self.storagedriver_partition_metadata.path) # Assign DTL dtl_info = self.sr_installer.partition_info[DiskPartition.ROLES.DTL][0] self.storagedriver_partition_dtl = StorageDriverController.add_storagedriverpartition(storagedriver=self.storagedriver, partition_info={'size': None, 'role': DiskPartition.ROLES.DTL, 'partition': DiskPartition(dtl_info['guid'])}) self.sr_installer.created_dirs.append(self.storagedriver_partition_dtl.path) self.sr_installer.created_dirs.append(self.storagedriver.mountpoint) # Create the directories self.sr_installer.root_client.dir_create(directories=self.sr_installer.created_dirs)
def test_node_config_checkup(self): """ Validates correct working of cluster registry checkup """ base_structure = {'1': {'vrouter_id': '1', 'message_host': '10.0.1.1', 'message_port': 1, 'xmlrpc_host': '10.0.0.1', 'xmlrpc_port': 2, 'failovercache_host': '10.0.1.1', 'failovercache_port': 3, 'network_server_uri': 'tcp://10.0.1.1:4', 'node_distance_map': None}, '2': {'vrouter_id': '2', 'message_host': '10.0.1.2', 'message_port': 1, 'xmlrpc_host': '10.0.0.2', 'xmlrpc_port': 2, 'failovercache_host': '10.0.1.2', 'failovercache_port': 3, 'network_server_uri': 'tcp://10.0.1.2:4', 'node_distance_map': None}} def _validate_node_config(_config, _expected_map): expected = copy.deepcopy(base_structure[_config.vrouter_id]) expected['node_distance_map'] = _expected_map[_config.vrouter_id] self.assertDictEqual(expected, {'vrouter_id': _config.vrouter_id, 'message_host': _config.message_host, 'message_port': _config.message_port, 'xmlrpc_host': _config.xmlrpc_host, 'xmlrpc_port': _config.xmlrpc_port, 'failovercache_host': _config.failovercache_host, 'failovercache_port': _config.failovercache_port, 'network_server_uri': _config.network_server_uri, 'node_distance_map': _config.node_distance_map}) structure = Helper.build_service_structure( {'vpools': [1], 'domains': [1, 2], 'storagerouters': [1, 2], 'storagedrivers': [(1, 1, 1), (2, 1, 2)], # (<id>, <vpool_id>, <storagerouter_id>) 'storagerouter_domains': [(1, 1, 1, False), (2, 2, 1, False)]} # (id>, <storagerouter_id>, <domain_id>, <backup>) ) storagerouters = structure['storagerouters'] vpool = structure['vpools'][1] System._machine_id = {storagerouters[1].ip: '1', storagerouters[2].ip: '2'} ArakoonInstaller.create_cluster('voldrv', ServiceType.ARAKOON_CLUSTER_TYPES.SD, storagerouters[1].ip, '/tmp') # Initial run, it will now be configured StorageRouterClient.node_config_recordings = [] result = StorageDriverController.cluster_registry_checkup() self.assertDictEqual(result, {vpool.guid: {'success': True, 'changes': True}}) self.assertListEqual(sorted(StorageRouterClient.node_config_recordings), ['1', '2']) expected_map = {'1': {'2': StorageDriver.DISTANCES.NEAR}, '2': {'1': StorageDriver.DISTANCES.NEAR}} configs = vpool.clusterregistry_client.get_node_configs() for config in configs: _validate_node_config(config, expected_map) # Running it again should not change anything StorageRouterClient.node_config_recordings = [] result = StorageDriverController.cluster_registry_checkup() self.assertDictEqual(result, {vpool.guid: {'success': True, 'changes': False}}) self.assertListEqual(sorted(StorageRouterClient.node_config_recordings), []) expected_map = {'1': {'2': StorageDriver.DISTANCES.NEAR}, '2': {'1': StorageDriver.DISTANCES.NEAR}} configs = vpool.clusterregistry_client.get_node_configs() for config in configs: _validate_node_config(config, expected_map) # Validate some error paths domain = structure['domains'][2] junction = structure['storagerouters'][1].domains[0] junction.domain = domain junction.save() vpool_config_path = 'file://opt/OpenvStorage/config/framework.json?key=/ovs/vpools/{0}/hosts/1/config'.format(vpool.guid) StorageRouterClient.exceptions['server_revision'] = {vpool_config_path: Exception('ClusterNotReachableException')} StorageRouterClient.node_config_recordings = [] result = StorageDriverController.cluster_registry_checkup() self.assertDictEqual(result, {vpool.guid: {'success': True, 'changes': True}}) self.assertListEqual(sorted(StorageRouterClient.node_config_recordings), ['2']) expected_map = {'1': {'2': StorageDriver.DISTANCES.INFINITE}, '2': {'1': StorageDriver.DISTANCES.INFINITE}} configs = vpool.clusterregistry_client.get_node_configs() for config in configs: _validate_node_config(config, expected_map)
def prepare_mds_service(cls, storagerouter, vpool): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and vPool are already configured with a StorageDriver and that all model-wise configurations regarding both have been completed. :param storagerouter: StorageRouter on which the MDS service will be created :type storagerouter: ovs.dal.hybrids.storagerouter.StorageRouter :param vpool: The vPool for which the MDS service will be created :type vpool: ovs.dal.hybrids.vpool.VPool :raises RuntimeError: vPool is not extended on StorageRouter No ServiceType found for 'MetadataServer' No free port is found for the new MDSService No partition found on StorageRouter with DB role :return: Newly created junction service :rtype: ovs.dal.hybrids.j_mdsservice.MDSService """ from ovs.lib.storagedriver import StorageDriverController # Import here to prevent from circular imports cls._logger.info('StorageRouter {0} - vPool {1}: Preparing MDS junction service'.format(storagerouter.name, vpool.name)) mds_service = MDSService() with volatile_mutex(name='prepare_mds_{0}'.format(storagerouter.guid), wait=30): # VALIDATIONS # Verify passed StorageRouter is part of the vPool storagerouter.invalidate_dynamics(['vpools_guids']) if vpool.guid not in storagerouter.vpools_guids: raise RuntimeError('StorageRouter {0} is not part of vPool {1}'.format(storagerouter.name, vpool.name)) # Verify ServiceType existence mds_service_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER) if mds_service_type is None: raise RuntimeError('No ServiceType found with name {0}'.format(ServiceType.SERVICE_TYPES.MD_SERVER)) # Retrieve occupied ports for current StorageRouter and max MDSService number for current vPool/StorageRouter combo service_number = -1 occupied_ports = [] for service in mds_service_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.extend(service.ports) if service.mds_service.vpool_guid == vpool.guid: service_number = max(service.mds_service.number, service_number) client = SSHClient(endpoint=storagerouter) mds_port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|mds'.format(System.get_my_machine_id(client))) free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, amount=1, client=client) if len(free_ports) != 1: raise RuntimeError('Failed to find an available port on StorageRouter {0} within range {1}'.format(storagerouter.name, mds_port_range)) # Partition check db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition break if db_partition is None: raise RuntimeError('Could not find DB partition on StorageRouter {0}'.format(storagerouter.name)) # Verify StorageDriver configured storagedrivers = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid] if len(storagedrivers) != 1: raise RuntimeError('Expected to find a configured StorageDriver for vPool {0} on StorageRouter {1}'.format(vpool.name, storagerouter.name)) # MODEL UPDATES # Service and MDS service service_number += 1 cls._logger.info('StorageRouter {0} - vPool {1}: Adding junction service with number {2}'.format(storagerouter.name, vpool.name, service_number)) service = Service() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mds_service_type service.ports = free_ports service.storagerouter = storagerouter service.save() mds_service.vpool = vpool mds_service.number = service_number mds_service.service = service mds_service.save() # StorageDriver partitions cls._logger.info('StorageRouter {0} - vPool {1}: Adding StorageDriverPartition on partition with mount point {2}'.format(storagerouter.name, vpool.name, db_partition.mountpoint)) storagedriver = storagedrivers[0] sdp = StorageDriverController.add_storagedriverpartition(storagedriver, {'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': db_partition, 'mds_service': mds_service}) # CONFIGURATIONS # Volumedriver mds_nodes = [] for sd_partition in storagedriver.partitions: if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS and sd_partition.mds_service is not None: service = sd_partition.mds_service.service mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': '{0}/db'.format(sd_partition.path), 'scratch_directory': '{0}/scratch'.format(sd_partition.path)}) cls._logger.info('StorageRouter {0} - vPool {1}: Configuring StorageDriver with MDS nodes: {2}'.format(storagerouter.name, vpool.name, mds_nodes)) # Generate the correct section in the StorageDriver's configuration try: storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id) storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client) except Exception: cls._logger.exception('StorageRouter {0} - vPool {1}: Configuring StorageDriver failed. Reverting model changes'.format(storagerouter.name, vpool.name)) # Clean up model changes if error occurs sdp.delete() mds_service.delete() # Must be removed before the service service.delete() return mds_service
def prepare_mds_service(storagerouter, vpool, fresh_only, reload_config): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. :param storagerouter: Storagerouter on which MDS service will be created :param vpool: The vPool for which the MDS service will be created :param fresh_only: If True and no current mds services exist for this vpool on this storagerouter, a new 1 will be created :param reload_config: If True, the volumedriver's updated configuration will be reloaded """ # Fetch service sequence number based on MDS services for current vPool and current storage router service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return # There is already 1 or more MDS services running, aborting # VALIDATIONS # 1. Find free port based on MDS services for all vPools on current storage router client = SSHClient(storagerouter) mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.extend(service.ports) mds_port_range = client.config_read('ovs.ports.mds') free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, nr=1, client=client) if not free_ports: raise RuntimeError('Failed to find an available port on storage router {0} within range {1}'.format(storagerouter.name, mds_port_range)) # 2. Partition check db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition break if db_partition is None: raise RuntimeError('Could not find DB partition on storage router {0}'.format(storagerouter.name)) # 3. Verify storage driver configured storagedrivers = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid] if not storagedrivers: raise RuntimeError('Expected to find a configured storagedriver for vpool {0} on storage router {1}'.format(vpool.name, storagerouter.name)) # MODEL UPDATES # 1. Service service_number += 1 service = Service() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.ports = [free_ports[0]] service.storagerouter = storagerouter service.save() mds_service = MDSService() mds_service.vpool = vpool mds_service.number = service_number mds_service.service = service mds_service.save() # 2. Storage driver partitions from ovs.lib.storagedriver import StorageDriverController sdp = StorageDriverController.add_storagedriverpartition(storagedrivers[0], {'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': db_partition, 'mds_service': mds_service}) # CONFIGURATIONS # 1. Volumedriver mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': sdp.path, 'scratch_directory': sdp.path}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def remove_node(node_ip, silent=None): """ Remove the node with specified IP from the cluster :param node_ip: IP of the node to remove :type node_ip: str :param silent: If silent == '--force-yes' no question will be asked to confirm the removal :type silent: str :return: None """ from ovs.lib.storagedriver import StorageDriverController from ovs.lib.storagerouter import StorageRouterController from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeRemovalController._logger, messages="Remove node", boxed=True) Toolbox.log( logger=NodeRemovalController._logger, messages="WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n", ) ############### # VALIDATIONS # ############### try: node_ip = node_ip.strip() if not isinstance(node_ip, str): raise ValueError("Node IP must be a string") if not re.match(SSHClient.IP_REGEX, node_ip): raise ValueError("Invalid IP {0} specified".format(node_ip)) storage_router_all = StorageRouterList.get_storagerouters() storage_router_masters = StorageRouterList.get_masters() storage_router_all_ips = set([storage_router.ip for storage_router in storage_router_all]) storage_router_master_ips = set([storage_router.ip for storage_router in storage_router_masters]) storage_router_to_remove = StorageRouterList.get_by_ip(node_ip) if node_ip not in storage_router_all_ips: raise ValueError( "Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}".format( "\n - ".join(storage_router_all_ips), node_ip ) ) if len(storage_router_all_ips) == 1: raise RuntimeError("Removing the only node is not possible") if node_ip in storage_router_master_ips and len(storage_router_master_ips) == 1: raise RuntimeError("Removing the only master node is not possible") if System.get_my_storagerouter() == storage_router_to_remove: raise RuntimeError( "The node to be removed cannot be identical to the node on which the removal is initiated" ) Toolbox.log( logger=NodeRemovalController._logger, messages="Creating SSH connections to remaining master nodes" ) master_ip = None ip_client_map = {} storage_routers_offline = [] storage_router_to_remove_online = True for storage_router in storage_router_all: try: client = SSHClient(storage_router, username="******") if client.run(["pwd"]): Toolbox.log( logger=NodeRemovalController._logger, messages=" Node with IP {0:<15} successfully connected to".format(storage_router.ip), ) ip_client_map[storage_router.ip] = client if storage_router != storage_router_to_remove and storage_router.node_type == "MASTER": master_ip = storage_router.ip except UnableToConnectException: Toolbox.log( logger=NodeRemovalController._logger, messages=" Node with IP {0:<15} is unreachable".format(storage_router.ip), ) storage_routers_offline.append(storage_router) if storage_router == storage_router_to_remove: storage_router_to_remove_online = False if len(ip_client_map) == 0 or master_ip is None: raise RuntimeError("Could not connect to any master node in the cluster") storage_router_to_remove.invalidate_dynamics("vdisks_guids") if ( len(storage_router_to_remove.vdisks_guids) > 0 ): # vDisks are supposed to be moved away manually before removing a node raise RuntimeError("Still vDisks attached to Storage Router {0}".format(storage_router_to_remove.name)) internal_memcached = Toolbox.is_service_internally_managed(service="memcached") internal_rabbit_mq = Toolbox.is_service_internally_managed(service="rabbitmq") memcached_endpoints = Configuration.get(key="/ovs/framework/memcache|endpoints") rabbit_mq_endpoints = Configuration.get(key="/ovs/framework/messagequeue|endpoints") copy_memcached_endpoints = list(memcached_endpoints) copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints) for endpoint in memcached_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_memcached_endpoints.remove(endpoint) for endpoint in rabbit_mq_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_rabbit_mq_endpoints.remove(endpoint) if len(copy_memcached_endpoints) == 0 and internal_memcached is True: raise RuntimeError( "Removal of provided nodes will result in a complete removal of the memcached service" ) if len(copy_rabbit_mq_endpoints) == 0 and internal_rabbit_mq is True: raise RuntimeError( "Removal of provided nodes will result in a complete removal of the messagequeue service" ) except Exception as exception: Toolbox.log( logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel="exception" ) sys.exit(1) ################# # CONFIRMATIONS # ################# interactive = silent != "--force-yes" remove_asd_manager = not interactive # Remove ASD manager if non-interactive else ask if interactive is True: proceed = Interactive.ask_yesno( message="Are you sure you want to remove node {0}?".format(storage_router_to_remove.name), default_value=False, ) if proceed is False: Toolbox.log(logger=NodeRemovalController._logger, messages="Abort removal", title=True) sys.exit(1) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") if ServiceManager.has_service(name="asd-manager", client=client): remove_asd_manager = Interactive.ask_yesno( message="Do you also want to remove the ASD manager and related ASDs?", default_value=False ) if remove_asd_manager is True or storage_router_to_remove_online is False: for function in Toolbox.fetch_hooks("setup", "validate_asd_removal"): validation_output = function(storage_router_to_remove.ip) if validation_output["confirm"] is True: if Interactive.ask_yesno(message=validation_output["question"], default_value=False) is False: remove_asd_manager = False break ########### # REMOVAL # ########### try: Toolbox.log( logger=NodeRemovalController._logger, messages="Starting removal of node {0} - {1}".format( storage_router_to_remove.name, storage_router_to_remove.ip ), ) if storage_router_to_remove_online is False: Toolbox.log( logger=NodeRemovalController._logger, messages=" Marking all Storage Drivers served by Storage Router {0} as offline".format( storage_router_to_remove.ip ), ) StorageDriverController.mark_offline(storagerouter_guid=storage_router_to_remove.guid) # Remove vPools Toolbox.log( logger=NodeRemovalController._logger, messages=" Removing vPools from node".format(storage_router_to_remove.ip), ) storage_routers_offline_guids = [ sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid ] for storage_driver in storage_router_to_remove.storagedrivers: Toolbox.log( logger=NodeRemovalController._logger, messages=" Removing vPool {0} from node".format(storage_driver.vpool.name), ) StorageRouterController.remove_storagedriver( storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids ) # Demote if MASTER if storage_router_to_remove.node_type == "MASTER": NodeTypeController.demote_node( cluster_ip=storage_router_to_remove.ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=storage_router_to_remove.machine_id, unconfigure_memcached=internal_memcached, unconfigure_rabbitmq=internal_rabbit_mq, offline_nodes=storage_routers_offline, ) # Stop / remove services Toolbox.log(logger=NodeRemovalController._logger, messages="Stopping and removing services") config_store = Configuration.get_store() if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") NodeRemovalController.remove_services( client=client, node_type=storage_router_to_remove.node_type.lower(), logger=NodeRemovalController._logger, ) service = "watcher-config" if ServiceManager.has_service(service, client=client): Toolbox.log(logger=NodeRemovalController._logger, messages="Removing service {0}".format(service)) ServiceManager.stop_service(service, client=client) ServiceManager.remove_service(service, client=client) if config_store == "etcd": from ovs.extensions.db.etcd.installer import EtcdInstaller if Configuration.get(key="/ovs/framework/external_config") is None: Toolbox.log(logger=NodeRemovalController._logger, messages=" Removing Etcd cluster") try: EtcdInstaller.stop("config", client) EtcdInstaller.remove("config", client) except Exception as ex: Toolbox.log( logger=NodeRemovalController._logger, messages=["\nFailed to unconfigure Etcd", ex], loglevel="exception", ) Toolbox.log(logger=NodeRemovalController._logger, messages="Removing Etcd proxy") EtcdInstaller.remove_proxy("config", client.ip) Toolbox.run_hooks( component="noderemoval", sub_component="remove", logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip, complete_removal=remove_asd_manager, ) # Clean up model Toolbox.log(logger=NodeRemovalController._logger, messages="Removing node from model") for service in storage_router_to_remove.services: service.delete() for disk in storage_router_to_remove.disks: for partition in disk.partitions: partition.delete() disk.delete() for j_domain in storage_router_to_remove.domains: j_domain.delete() Configuration.delete("/ovs/framework/hosts/{0}".format(storage_router_to_remove.machine_id)) NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, offline_node_ips=[node.ip for node in storage_routers_offline], logger=NodeRemovalController._logger, ) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") if config_store == "arakoon": client.file_delete(filenames=[ArakoonConfiguration.CACC_LOCATION]) client.file_delete(filenames=[Configuration.BOOTSTRAP_CONFIG_LOCATION]) storage_router_to_remove.delete() Toolbox.log(logger=NodeRemovalController._logger, messages="Successfully removed node\n") except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages="\n") Toolbox.log( logger=NodeRemovalController._logger, messages=["An unexpected error occurred:", str(exception)], boxed=True, loglevel="exception", ) sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages="\n") Toolbox.log( logger=NodeRemovalController._logger, messages="This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.", boxed=True, loglevel="error", ) sys.exit(1) if remove_asd_manager is True: Toolbox.log(logger=NodeRemovalController._logger, messages="\nRemoving ASD Manager") with remote(storage_router_to_remove.ip, [os]) as rem: rem.os.system("asd-manager remove --force-yes") Toolbox.log(logger=NodeRemovalController._logger, messages="Remove nodes finished", title=True)
def prepare_mds_service(storagerouter, vpool, fresh_only, reload_config): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. :param storagerouter: Storagerouter on which MDS service will be created :type storagerouter: StorageRouter :param vpool: The vPool for which the MDS service will be created :type vpool: VPool :param fresh_only: If True and no current mds services exist for this vpool on this storagerouter, a new 1 will be created :type fresh_only: bool :param reload_config: If True, the volumedriver's updated configuration will be reloaded :type reload_config: bool :return: Newly created service :rtype: MDSService """ # Fetch service sequence number based on MDS services for current vPool and current storage router service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return # There is already 1 or more MDS services running, aborting # VALIDATIONS # 1. Find free port based on MDS services for all vPools on current storage router client = SSHClient(storagerouter) mdsservice_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER) occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.extend(service.ports) mds_port_range = Configuration.get( "/ovs/framework/hosts/{0}/ports|mds".format(System.get_my_machine_id(client)) ) free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, nr=1, client=client) if not free_ports: raise RuntimeError( "Failed to find an available port on storage router {0} within range {1}".format( storagerouter.name, mds_port_range ) ) # 2. Partition check db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition break if db_partition is None: raise RuntimeError("Could not find DB partition on storage router {0}".format(storagerouter.name)) # 3. Verify storage driver configured storagedrivers = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid] if not storagedrivers: raise RuntimeError( "Expected to find a configured storagedriver for vpool {0} on storage router {1}".format( vpool.name, storagerouter.name ) ) storagedriver = storagedrivers[0] # MODEL UPDATES # 1. Service service_number += 1 service = Service() service.name = "metadataserver_{0}_{1}".format(vpool.name, service_number) service.type = mdsservice_type service.ports = [free_ports[0]] service.storagerouter = storagerouter service.save() mds_service = MDSService() mds_service.vpool = vpool mds_service.number = service_number mds_service.service = service mds_service.save() # 2. Storage driver partitions from ovs.lib.storagedriver import StorageDriverController StorageDriverController.add_storagedriverpartition( storagedriver, { "size": None, "role": DiskPartition.ROLES.DB, "sub_role": StorageDriverPartition.SUBROLE.MDS, "partition": db_partition, "mds_service": mds_service, }, ) # CONFIGURATIONS # 1. Volumedriver mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service is not None: if mds_service.vpool_guid == vpool.guid: sdp = [ sd_partition for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS ][0] mds_nodes.append( { "host": service.storagerouter.ip, "port": service.ports[0], "db_directory": sdp.path, "scratch_directory": sdp.path, } ) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration("storagedriver", vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def add_vpool(cls, parameters): """ Add a vPool to the machine this task is running on :param parameters: Parameters for vPool creation :type parameters: dict :return: None :rtype: NoneType """ # TODO: Add logging cls._logger.debug('Adding vpool. Parameters: {}'.format(parameters)) # VALIDATIONS if not isinstance(parameters, dict): raise ValueError( 'Parameters passed to create a vPool should be of type dict') # Check StorageRouter existence storagerouter = StorageRouterList.get_by_ip( ip=parameters.get('storagerouter_ip')) if storagerouter is None: raise RuntimeError('Could not find StorageRouter') # Validate requested vPool configurations vp_installer = VPoolInstaller(name=parameters.get('vpool_name')) vp_installer.validate(storagerouter=storagerouter) # Validate requested StorageDriver configurations cls._logger.info( 'vPool {0}: Validating StorageDriver configurations'.format( vp_installer.name)) sd_installer = StorageDriverInstaller( vp_installer=vp_installer, configurations={ 'storage_ip': parameters.get('storage_ip'), 'caching_info': parameters.get('caching_info'), 'backend_info': { 'main': parameters.get('backend_info'), StorageDriverConfiguration.CACHE_BLOCK: parameters.get('backend_info_bc'), StorageDriverConfiguration.CACHE_FRAGMENT: parameters.get('backend_info_fc') }, 'connection_info': { 'main': parameters.get('connection_info'), StorageDriverConfiguration.CACHE_BLOCK: parameters.get('connection_info_bc'), StorageDriverConfiguration.CACHE_FRAGMENT: parameters.get('connection_info_fc') }, 'sd_configuration': parameters.get('config_params') }) partitions_mutex = volatile_mutex('add_vpool_partitions_{0}'.format( storagerouter.guid)) try: # VPOOL CREATION # Create the vPool as soon as possible in the process to be displayed in the GUI (INSTALLING/EXTENDING state) if vp_installer.is_new is True: vp_installer.create(rdma_enabled=sd_installer.rdma_enabled) vp_installer.configure_mds( config=parameters.get('mds_config_params', {})) else: vp_installer.update_status(status=VPool.STATUSES.EXTENDING) # ADDITIONAL VALIDATIONS # Check StorageRouter connectivity cls._logger.info( 'vPool {0}: Validating StorageRouter connectivity'.format( vp_installer.name)) linked_storagerouters = [storagerouter] if vp_installer.is_new is False: linked_storagerouters += [ sd.storagerouter for sd in vp_installer.vpool.storagedrivers ] sr_client_map = SSHClient.get_clients( endpoints=linked_storagerouters, user_names=['ovs', 'root']) offline_nodes = sr_client_map.pop('offline') if storagerouter in offline_nodes: raise RuntimeError( 'Node on which the vPool is being {0} is not reachable'. format('created' if vp_installer.is_new is True else 'extended')) sr_installer = StorageRouterInstaller( root_client=sr_client_map[storagerouter]['root'], sd_installer=sd_installer, vp_installer=vp_installer, storagerouter=storagerouter) # When 2 or more jobs simultaneously run on the same StorageRouter, we need to check and create the StorageDriver partitions in locked context partitions_mutex.acquire(wait=60) sr_installer.partition_info = StorageRouterController.get_partition_info( storagerouter_guid=storagerouter.guid) sr_installer.validate_vpool_extendable() sr_installer.validate_global_write_buffer( requested_size=parameters.get('writecache_size', 0)) sr_installer.validate_local_cache_size( requested_proxies=parameters.get('parallelism', {}).get( 'proxies', 2)) # MODEL STORAGEDRIVER AND PARTITION JUNCTIONS sd_installer.create() sd_installer.create_partitions() partitions_mutex.release() vp_installer.refresh_metadata() except Exception: cls._logger.exception( 'Something went wrong during the validation or modeling of vPool {0} on StorageRouter {1}' .format(vp_installer.name, storagerouter.name)) partitions_mutex.release() vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) raise # Arakoon setup counter = 0 while counter < 300: try: if StorageDriverController.manual_voldrv_arakoon_checkup( ) is True: break except Exception: cls._logger.exception( 'Arakoon checkup for voldrv cluster failed') vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) raise counter += 1 time.sleep(1) if counter == 300: vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) raise RuntimeError( 'Arakoon checkup for the StorageDriver cluster could not be started' ) # Cluster registry try: vp_installer.configure_cluster_registry(allow_raise=True) except Exception: if vp_installer.is_new is True: vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) else: vp_installer.revert_vpool(status=VPool.STATUSES.FAILURE) raise try: sd_installer.setup_proxy_configs() sd_installer.configure_storagedriver_service() DiskController.sync_with_reality(storagerouter.guid) MDSServiceController.prepare_mds_service( storagerouter=storagerouter, vpool=vp_installer.vpool) # Update the MDS safety if changed via API (vpool.configuration will be available at this point also for the newly added StorageDriver) vp_installer.vpool.invalidate_dynamics('configuration') if vp_installer.mds_safety is not None and vp_installer.vpool.configuration[ 'mds_config']['mds_safety'] != vp_installer.mds_safety: Configuration.set( key='/ovs/vpools/{0}/mds_config|mds_safety'.format( vp_installer.vpool.guid), value=vp_installer.mds_safety) sd_installer.start_services( ) # Create and start watcher volumedriver, DTL, proxies and StorageDriver services # Post creation/extension checkups mds_config_set = MDSServiceController.get_mds_storagedriver_config_set( vpool=vp_installer.vpool, offline_nodes=offline_nodes) for sr, clients in sr_client_map.iteritems(): for current_storagedriver in [ sd for sd in sr.storagedrivers if sd.vpool_guid == vp_installer.vpool.guid ]: storagedriver_config = StorageDriverConfiguration( vpool_guid=vp_installer.vpool.guid, storagedriver_id=current_storagedriver.storagedriver_id ) if storagedriver_config.config_missing is False: # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them storagedriver_config.configure_filesystem( fs_metadata_backend_mds_nodes=mds_config_set[ sr.guid]) storagedriver_config.save(client=clients['ovs']) # Everything's reconfigured, refresh new cluster configuration for current_storagedriver in vp_installer.vpool.storagedrivers: if current_storagedriver.storagerouter not in sr_client_map: continue vp_installer.vpool.storagedriver_client.update_cluster_node_configs( str(current_storagedriver.storagedriver_id), req_timeout_secs=10) except Exception: cls._logger.exception('vPool {0}: Creation failed'.format( vp_installer.name)) vp_installer.update_status(status=VPool.STATUSES.FAILURE) raise # When a node is offline, we can run into errors, but also when 1 or more volumes are not running # Scheduled tasks below, so don't really care whether they succeed or not try: VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid, ensure_single_timeout=600) except: pass for vdisk in vp_installer.vpool.vdisks: try: MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid) except: pass vp_installer.update_status(status=VPool.STATUSES.RUNNING) cls._logger.info('Add vPool {0} ended successfully'.format( vp_installer.name))
def promote_or_demote_node(node_action, cluster_ip=None, execute_rollback=False): """ Promotes or demotes the local node :param node_action: Demote or promote :type node_action: str :param cluster_ip: IP of node to promote or demote :type cluster_ip: str :param execute_rollback: In case of failure revert the changes made :type execute_rollback: bool :return: None """ if node_action not in ('promote', 'demote'): raise ValueError('Nodes can only be promoted or demoted') Toolbox.log(logger=NodeTypeController._logger, messages='Open vStorage Setup - {0}'.format(node_action.capitalize()), boxed=True) try: Toolbox.log(logger=NodeTypeController._logger, messages='Collecting information', title=True) machine_id = System.get_my_machine_id() if Configuration.get('/ovs/framework/hosts/{0}/setupcompleted'.format(machine_id)) is False: raise RuntimeError('No local OVS setup found.') if cluster_ip and not re.match(Toolbox.regex_ip, cluster_ip): raise RuntimeError('Incorrect IP provided ({0})'.format(cluster_ip)) if cluster_ip: client = SSHClient(endpoint=cluster_ip) machine_id = System.get_my_machine_id(client) node_type = Configuration.get('/ovs/framework/hosts/{0}/type'.format(machine_id)) if node_action == 'promote' and node_type == 'MASTER': raise RuntimeError('This node is already master.') elif node_action == 'demote' and node_type == 'EXTRA': raise RuntimeError('This node should be a master.') elif node_type not in ['MASTER', 'EXTRA']: raise RuntimeError('This node is not correctly configured.') master_ip = None offline_nodes = [] online = True target_client = None if node_action == 'demote' and cluster_ip: # Demote an offline node from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.lib.storagedriver import StorageDriverController ip = cluster_ip unique_id = None ip_client_map = {} for storage_router in StorageRouterList.get_storagerouters(): try: client = SSHClient(storage_router.ip, username='******') if storage_router.node_type == 'MASTER': master_ip = storage_router.ip ip_client_map[storage_router.ip] = client except UnableToConnectException: if storage_router.ip == cluster_ip: online = False unique_id = storage_router.machine_id StorageDriverController.mark_offline(storagerouter_guid=storage_router.guid) offline_nodes.append(storage_router) if online is True: raise RuntimeError("If the node is online, please use 'ovs setup demote' executed on the node you wish to demote") if master_ip is None: raise RuntimeError('Failed to retrieve another responsive MASTER node') else: target_password = Toolbox.ask_validate_password(ip='127.0.0.1', logger=NodeTypeController._logger) target_client = SSHClient('127.0.0.1', username='******', password=target_password) unique_id = System.get_my_machine_id(target_client) ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(unique_id)) storagerouter_info = NodeTypeController.retrieve_storagerouter_info_via_host(ip=target_client.ip, password=target_password) node_ips = [sr_info['ip'] for sr_info in storagerouter_info.itervalues()] master_node_ips = [sr_info['ip'] for sr_info in storagerouter_info.itervalues() if sr_info['type'] == 'master' and sr_info['ip'] != ip] if len(master_node_ips) == 0: if node_action == 'promote': raise RuntimeError('No master node could be found') else: raise RuntimeError('It is not possible to remove the only master') master_ip = master_node_ips[0] ip_client_map = dict((node_ip, SSHClient(node_ip, username='******')) for node_ip in node_ips) if node_action == 'demote': for cluster_name in Configuration.list('/ovs/arakoon'): config = ArakoonClusterConfig(cluster_name, False) config.load_config() arakoon_client = ArakoonInstaller.build_client(config) metadata = json.loads(arakoon_client.get(ArakoonInstaller.METADATA_KEY)) if len(config.nodes) == 1 and config.nodes[0].ip == ip and metadata.get('internal') is True: raise RuntimeError('Demote is not supported when single node Arakoon cluster(s) are present on the node to be demoted.') configure_rabbitmq = Toolbox.is_service_internally_managed(service='rabbitmq') configure_memcached = Toolbox.is_service_internally_managed(service='memcached') if node_action == 'promote': try: NodeTypeController.promote_node(cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, configure_memcached=configure_memcached, configure_rabbitmq=configure_rabbitmq) except Exception: if execute_rollback is True: NodeTypeController.demote_node(cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, unconfigure_memcached=configure_memcached, unconfigure_rabbitmq=configure_rabbitmq, offline_nodes=offline_nodes) elif target_client is not None: target_client.file_write('/tmp/ovs_rollback', 'demote') raise else: try: NodeTypeController.demote_node(cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, unconfigure_memcached=configure_memcached, unconfigure_rabbitmq=configure_rabbitmq, offline_nodes=offline_nodes) except Exception: if execute_rollback is True: NodeTypeController.promote_node(cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, configure_memcached=configure_memcached, configure_rabbitmq=configure_rabbitmq) elif target_client is not None: target_client.file_write('/tmp/ovs_rollback', 'promote') raise Toolbox.log(logger=NodeTypeController._logger, messages='\n') Toolbox.log(logger=NodeTypeController._logger, messages='{0} complete.'.format(node_action.capitalize()), boxed=True) except Exception as exception: Toolbox.log(logger=NodeTypeController._logger, messages='\n') Toolbox.log(logger=NodeTypeController._logger, messages=['An unexpected error occurred:', str(exception)], boxed=True, loglevel='exception') sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeTypeController._logger, messages='\n') Toolbox.log(logger=NodeTypeController._logger, messages='This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.', boxed=True, loglevel='error') sys.exit(1)
def remove_node(node_ip, silent=None): """ Remove the node with specified IP from the cluster :param node_ip: IP of the node to remove :type node_ip: str :param silent: If silent == '--force-yes' no question will be asked to confirm the removal :type silent: str :return: None """ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.lib.storagedriver import StorageDriverController from ovs.lib.vpool import VPoolController Toolbox.log(logger=NodeRemovalController._logger, messages='Remove node', boxed=True) Toolbox.log( logger=NodeRemovalController._logger, messages= 'WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n' ) service_manager = ServiceFactory.get_manager() ############### # VALIDATIONS # ############### try: node_ip = node_ip.strip() if not isinstance(node_ip, str): raise ValueError('Node IP must be a string') if not re.match(SSHClient.IP_REGEX, node_ip): raise ValueError('Invalid IP {0} specified'.format(node_ip)) storage_router_all = sorted(StorageRouterList.get_storagerouters(), key=lambda k: k.name) storage_router_masters = StorageRouterList.get_masters() storage_router_all_ips = set( [storage_router.ip for storage_router in storage_router_all]) storage_router_master_ips = set([ storage_router.ip for storage_router in storage_router_masters ]) storage_router_to_remove = StorageRouterList.get_by_ip(node_ip) offline_reasons = {} if node_ip not in storage_router_all_ips: raise ValueError( 'Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}' .format('\n - '.join(storage_router_all_ips), node_ip)) if len(storage_router_all_ips) == 1: raise RuntimeError("Removing the only node is not possible") if node_ip in storage_router_master_ips and len( storage_router_master_ips) == 1: raise RuntimeError( "Removing the only master node is not possible") if System.get_my_storagerouter() == storage_router_to_remove: raise RuntimeError( 'The node to be removed cannot be identical to the node on which the removal is initiated' ) Toolbox.log( logger=NodeRemovalController._logger, messages='Creating SSH connections to remaining master nodes') master_ip = None ip_client_map = {} storage_routers_offline = [] storage_router_to_remove_online = True for storage_router in storage_router_all: try: client = SSHClient(storage_router, username='******', timeout=10) except (UnableToConnectException, NotAuthenticatedException, TimeOutException) as ex: if isinstance(ex, UnableToConnectException): msg = 'Unable to connect' elif isinstance(ex, NotAuthenticatedException): msg = 'Could not authenticate' elif isinstance(ex, TimeOutException): msg = 'Connection timed out' Toolbox.log( logger=NodeRemovalController._logger, messages=' * Node with IP {0:<15}- {1}'.format( storage_router.ip, msg)) offline_reasons[storage_router.ip] = msg storage_routers_offline.append(storage_router) if storage_router == storage_router_to_remove: storage_router_to_remove_online = False continue Toolbox.log( logger=NodeRemovalController._logger, messages=' * Node with IP {0:<15}- Successfully connected' .format(storage_router.ip)) ip_client_map[storage_router.ip] = client if storage_router != storage_router_to_remove and storage_router.node_type == 'MASTER': master_ip = storage_router.ip if len(ip_client_map) == 0 or master_ip is None: raise RuntimeError( 'Could not connect to any master node in the cluster') storage_router_to_remove.invalidate_dynamics('vdisks_guids') if len( storage_router_to_remove.vdisks_guids ) > 0: # vDisks are supposed to be moved away manually before removing a node raise RuntimeError( "Still vDisks attached to Storage Router {0}".format( storage_router_to_remove.name)) internal_memcached = Toolbox.is_service_internally_managed( service='memcached') internal_rabbit_mq = Toolbox.is_service_internally_managed( service='rabbitmq') memcached_endpoints = Configuration.get( key='/ovs/framework/memcache|endpoints') rabbit_mq_endpoints = Configuration.get( key='/ovs/framework/messagequeue|endpoints') copy_memcached_endpoints = list(memcached_endpoints) copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints) for endpoint in memcached_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_memcached_endpoints.remove(endpoint) for endpoint in rabbit_mq_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_rabbit_mq_endpoints.remove(endpoint) if len(copy_memcached_endpoints ) == 0 and internal_memcached is True: raise RuntimeError( 'Removal of provided nodes will result in a complete removal of the memcached service' ) if len(copy_rabbit_mq_endpoints ) == 0 and internal_rabbit_mq is True: raise RuntimeError( 'Removal of provided nodes will result in a complete removal of the messagequeue service' ) Toolbox.run_hooks(component='noderemoval', sub_component='validate_removal', logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages= 'Removal has been aborted during the validation step. No changes have been applied.', boxed=True, loglevel='warning') sys.exit(1) except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel='exception') sys.exit(1) ################# # CONFIRMATIONS # ################# try: interactive = silent != '--force-yes' remove_asd_manager = not interactive # Remove ASD manager if non-interactive else ask if interactive is True: if len(storage_routers_offline) > 0: Toolbox.log( logger=NodeRemovalController._logger, messages= 'Certain nodes appear to be offline. These will not fully removed and will cause issues if they are not really offline.' ) Toolbox.log( logger=NodeRemovalController._logger, messages='Offline nodes: {0}'.format(''.join( ('\n * {0:<15}- {1}.'.format(ip, message) for ip, message in offline_reasons.iteritems())))) valid_node_info = Interactive.ask_yesno( message= 'Continue the removal with these being presumably offline?', default_value=False) if valid_node_info is False: Toolbox.log( logger=NodeRemovalController._logger, messages= 'Please validate the state of the nodes before removing.', title=True) sys.exit(1) proceed = Interactive.ask_yesno( message='Are you sure you want to remove node {0}?'.format( storage_router_to_remove.name), default_value=False) if proceed is False: Toolbox.log(logger=NodeRemovalController._logger, messages='Abort removal', title=True) sys.exit(1) remove_asd_manager = True if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username='******') if service_manager.has_service(name='asd-manager', client=client): remove_asd_manager = Interactive.ask_yesno( message= 'Do you also want to remove the ASD manager and related ASDs?', default_value=False) if remove_asd_manager is True or storage_router_to_remove_online is False: for fct in Toolbox.fetch_hooks('noderemoval', 'validate_asd_removal'): validation_output = fct(storage_router_to_remove.ip) if validation_output['confirm'] is True: if Interactive.ask_yesno( message=validation_output['question'], default_value=False) is False: remove_asd_manager = False break except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages= 'Removal has been aborted during the confirmation step. No changes have been applied.', boxed=True, loglevel='warning') sys.exit(1) except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel='exception') sys.exit(1) ########### # REMOVAL # ########### try: Toolbox.log(logger=NodeRemovalController._logger, messages='Starting removal of node {0} - {1}'.format( storage_router_to_remove.name, storage_router_to_remove.ip)) if storage_router_to_remove_online is False: Toolbox.log( logger=NodeRemovalController._logger, messages= ' Marking all Storage Drivers served by Storage Router {0} as offline' .format(storage_router_to_remove.ip)) StorageDriverController.mark_offline( storagerouter_guid=storage_router_to_remove.guid) # Remove vPools Toolbox.log(logger=NodeRemovalController._logger, messages=' Removing vPools from node'.format( storage_router_to_remove.ip)) storage_routers_offline_guids = [ sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid ] for storage_driver in storage_router_to_remove.storagedrivers: Toolbox.log(logger=NodeRemovalController._logger, messages=' Removing vPool {0} from node'.format( storage_driver.vpool.name)) VPoolController.shrink_vpool( storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids) # Demote if MASTER if storage_router_to_remove.node_type == 'MASTER': NodeTypeController.demote_node( cluster_ip=storage_router_to_remove.ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=storage_router_to_remove.machine_id, unconfigure_memcached=internal_memcached, unconfigure_rabbitmq=internal_rabbit_mq, offline_nodes=storage_routers_offline) # Stop / remove services Toolbox.log(logger=NodeRemovalController._logger, messages='Stopping and removing services') if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username='******') NodeRemovalController.remove_services( client=client, node_type=storage_router_to_remove.node_type.lower(), logger=NodeRemovalController._logger) service = 'watcher-config' if service_manager.has_service(service, client=client): Toolbox.log( logger=NodeRemovalController._logger, messages='Removing service {0}'.format(service)) service_manager.stop_service(service, client=client) service_manager.remove_service(service, client=client) Toolbox.run_hooks(component='noderemoval', sub_component='remove', logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip, complete_removal=remove_asd_manager) # Clean up model Toolbox.log(logger=NodeRemovalController._logger, messages='Removing node from model') for service in storage_router_to_remove.services: service.delete() for disk in storage_router_to_remove.disks: for partition in disk.partitions: partition.delete() disk.delete() for j_domain in storage_router_to_remove.domains: j_domain.delete() Configuration.delete('/ovs/framework/hosts/{0}'.format( storage_router_to_remove.machine_id)) NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, offline_node_ips=[node.ip for node in storage_routers_offline], logger=NodeRemovalController._logger) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username='******') client.file_delete(filenames=[CACC_LOCATION]) client.file_delete(filenames=[CONFIG_STORE_LOCATION]) storage_router_to_remove.delete() Toolbox.log(logger=NodeRemovalController._logger, messages='Successfully removed node\n') except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages=['An unexpected error occurred:', str(exception)], boxed=True, loglevel='exception') sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages= 'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.', boxed=True, loglevel='error') sys.exit(1) if remove_asd_manager is True and storage_router_to_remove_online is True: Toolbox.log(logger=NodeRemovalController._logger, messages='\nRemoving ASD Manager') with remote(storage_router_to_remove.ip, [os]) as rem: rem.os.system('asd-manager remove --force-yes') Toolbox.log(logger=NodeRemovalController._logger, messages='Remove nodes finished', title=True)
def promote_or_demote_node(node_action, cluster_ip=None, execute_rollback=False): """ Promotes or demotes the local node :param node_action: Demote or promote :type node_action: str :param cluster_ip: IP of node to promote or demote :type cluster_ip: str :param execute_rollback: In case of failure revert the changes made :type execute_rollback: bool :return: None """ if node_action not in ('promote', 'demote'): raise ValueError('Nodes can only be promoted or demoted') Toolbox.log(logger=NodeTypeController._logger, messages='Open vStorage Setup - {0}'.format( node_action.capitalize()), boxed=True) try: Toolbox.log(logger=NodeTypeController._logger, messages='Collecting information', title=True) machine_id = System.get_my_machine_id() if Configuration.get('/ovs/framework/hosts/{0}/setupcompleted'. format(machine_id)) is False: raise RuntimeError('No local OVS setup found.') if cluster_ip and not re.match(Toolbox.regex_ip, cluster_ip): raise RuntimeError( 'Incorrect IP provided ({0})'.format(cluster_ip)) if cluster_ip: client = SSHClient(endpoint=cluster_ip) machine_id = System.get_my_machine_id(client) node_type = Configuration.get( '/ovs/framework/hosts/{0}/type'.format(machine_id)) if node_action == 'promote' and node_type == 'MASTER': raise RuntimeError('This node is already master.') elif node_action == 'demote' and node_type == 'EXTRA': raise RuntimeError('This node should be a master.') elif node_type not in ['MASTER', 'EXTRA']: raise RuntimeError('This node is not correctly configured.') master_ip = None offline_nodes = [] online = True target_client = None if node_action == 'demote' and cluster_ip: # Demote an offline node from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.lib.storagedriver import StorageDriverController ip = cluster_ip unique_id = None ip_client_map = {} for storage_router in StorageRouterList.get_storagerouters(): try: client = SSHClient(storage_router.ip, username='******') if storage_router.node_type == 'MASTER': master_ip = storage_router.ip ip_client_map[storage_router.ip] = client except UnableToConnectException: if storage_router.ip == cluster_ip: online = False unique_id = storage_router.machine_id StorageDriverController.mark_offline( storagerouter_guid=storage_router.guid) offline_nodes.append(storage_router) if online is True: raise RuntimeError( "If the node is online, please use 'ovs setup demote' executed on the node you wish to demote" ) if master_ip is None: raise RuntimeError( 'Failed to retrieve another responsive MASTER node') else: target_password = Toolbox.ask_validate_password( ip='127.0.0.1', logger=NodeTypeController._logger) target_client = SSHClient('127.0.0.1', username='******', password=target_password) unique_id = System.get_my_machine_id(target_client) ip = Configuration.get( '/ovs/framework/hosts/{0}/ip'.format(unique_id)) storagerouter_info = NodeTypeController.retrieve_storagerouter_info_via_host( ip=target_client.ip, password=target_password) node_ips = [ sr_info['ip'] for sr_info in storagerouter_info.itervalues() ] master_node_ips = [ sr_info['ip'] for sr_info in storagerouter_info.itervalues() if sr_info['type'] == 'master' and sr_info['ip'] != ip ] if len(master_node_ips) == 0: if node_action == 'promote': raise RuntimeError('No master node could be found') else: raise RuntimeError( 'It is not possible to remove the only master') master_ip = master_node_ips[0] ip_client_map = dict( (node_ip, SSHClient(node_ip, username='******')) for node_ip in node_ips) if node_action == 'demote': for cluster_name in Configuration.list('/ovs/arakoon'): config = ArakoonClusterConfig(cluster_id=cluster_name) arakoon_client = ArakoonInstaller.build_client(config) metadata = json.loads( arakoon_client.get(ArakoonInstaller.METADATA_KEY)) if len(config.nodes) == 1 and config.nodes[ 0].ip == ip and metadata.get('internal') is True: raise RuntimeError( 'Demote is not supported when single node Arakoon cluster(s) are present on the node to be demoted.' ) configure_rabbitmq = Toolbox.is_service_internally_managed( service='rabbitmq') configure_memcached = Toolbox.is_service_internally_managed( service='memcached') if node_action == 'promote': try: NodeTypeController.promote_node( cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, configure_memcached=configure_memcached, configure_rabbitmq=configure_rabbitmq) except Exception: if execute_rollback is True: NodeTypeController.demote_node( cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, unconfigure_memcached=configure_memcached, unconfigure_rabbitmq=configure_rabbitmq, offline_nodes=offline_nodes) elif target_client is not None: target_client.file_write('/tmp/ovs_rollback', 'demote') raise else: try: NodeTypeController.demote_node( cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, unconfigure_memcached=configure_memcached, unconfigure_rabbitmq=configure_rabbitmq, offline_nodes=offline_nodes) except Exception: if execute_rollback is True: NodeTypeController.promote_node( cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, configure_memcached=configure_memcached, configure_rabbitmq=configure_rabbitmq) elif target_client is not None: target_client.file_write('/tmp/ovs_rollback', 'promote') raise Toolbox.log(logger=NodeTypeController._logger, messages='\n') Toolbox.log(logger=NodeTypeController._logger, messages='{0} complete.'.format( node_action.capitalize()), boxed=True) except Exception as exception: Toolbox.log(logger=NodeTypeController._logger, messages='\n') Toolbox.log( logger=NodeTypeController._logger, messages=['An unexpected error occurred:', str(exception)], boxed=True, loglevel='exception') sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeTypeController._logger, messages='\n') Toolbox.log( logger=NodeTypeController._logger, messages= 'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.', boxed=True, loglevel='error') sys.exit(1)