def migrate(previous_version, master_ips=None, extra_ips=None): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at verison 3 it will execute two steps: - 1 > 2 - 2 > 3 :param previous_version: The previous version from which to start the migration. :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ logger = LogHandler.get('extensions', name='migration') working_version = previous_version # Version 1 introduced: # - Flexible SSD layout if working_version < 1: try: from ovs.extensions.generic.configuration import Configuration if Configuration.exists('ovs.arakoon'): Configuration.delete('ovs.arakoon', remove_root=True) Configuration.set('ovs.core.ovsdb', '/opt/OpenvStorage/db') except: logger.exception('Error migrating to version 1') working_version = 1 # Version 2 introduced: # - Registration if working_version < 2: try: import time from ovs.extensions.generic.configuration import Configuration if not Configuration.exists('ovs.core.registered'): Configuration.set('ovs.core.registered', False) Configuration.set('ovs.core.install_time', time.time()) except: logger.exception('Error migrating to version 2') working_version = 2 # Version 3 introduced: # - New arakoon clients if working_version < 3: try: from ovs.extensions.db.arakoon import ArakoonInstaller reload(ArakoonInstaller) from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.generic.configuration import Configuration if master_ips is not None: for ip in master_ips: client = SSHClient(ip) if client.dir_exists( ArakoonInstaller.ARAKOON_CONFIG_DIR): for cluster_name in client.dir_list( ArakoonInstaller.ARAKOON_CONFIG_DIR): try: ArakoonInstaller.deploy_cluster( cluster_name, ip) except: pass if Configuration.exists('ovs.core.storage.persistent'): Configuration.set('ovs.core.storage.persistent', 'pyrakoon') except: logger.exception('Error migrating to version 3') working_version = 3 # Version 4 introduced: # - Etcd if working_version < 4: try: import os import json from ConfigParser import RawConfigParser from ovs.extensions.db.etcd import installer reload(installer) from ovs.extensions.db.etcd.installer import EtcdInstaller from ovs.extensions.db.etcd.configuration import EtcdConfiguration from ovs.extensions.generic.system import System host_id = System.get_my_machine_id() etcd_migrate = False if EtcdInstaller.has_cluster('127.0.0.1', 'config'): etcd_migrate = True else: if master_ips is not None and extra_ips is not None: cluster_ip = None for ip in master_ips + extra_ips: if EtcdInstaller.has_cluster(ip, 'config'): cluster_ip = ip break node_ip = None path = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(path): with open(path) as config_file: config = json.load(config_file) node_ip = config['grid']['ip'] if node_ip is not None: if cluster_ip is None: EtcdInstaller.create_cluster('config', node_ip) EtcdConfiguration.initialize() EtcdConfiguration.initialize_host(host_id) else: EtcdInstaller.extend_cluster( cluster_ip, node_ip, 'config') EtcdConfiguration.initialize_host(host_id) etcd_migrate = True if etcd_migrate is True: # Migrating configuration files path = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(path): with open(path) as config_file: config = json.load(config_file) EtcdConfiguration.set('/ovs/framework/cluster_id', config['support']['cid']) if not EtcdConfiguration.exists( '/ovs/framework/install_time'): EtcdConfiguration.set( '/ovs/framework/install_time', config['core']['install_time']) else: EtcdConfiguration.set( '/ovs/framework/install_time', min( EtcdConfiguration.get( '/ovs/framework/install_time'), config['core']['install_time'])) EtcdConfiguration.set('/ovs/framework/registered', config['core']['registered']) EtcdConfiguration.set( '/ovs/framework/plugins/installed', config['plugins']) EtcdConfiguration.set('/ovs/framework/stores', config['core']['storage']) EtcdConfiguration.set( '/ovs/framework/paths', { 'cfgdir': config['core']['cfgdir'], 'basedir': config['core']['basedir'], 'ovsdb': config['core']['ovsdb'] }) EtcdConfiguration.set( '/ovs/framework/support', { 'enablesupport': config['support']['enablesupport'], 'enabled': config['support']['enabled'], 'interval': config['support']['interval'] }) EtcdConfiguration.set( '/ovs/framework/storagedriver', { 'mds_safety': config['storagedriver']['mds']['safety'], 'mds_tlogs': config['storagedriver']['mds']['tlogs'], 'mds_maxload': config['storagedriver']['mds']['maxload'] }) EtcdConfiguration.set( '/ovs/framework/webapps', { 'html_endpoint': config['webapps']['html_endpoint'], 'oauth2': config['webapps']['oauth2'] }) EtcdConfiguration.set( '/ovs/framework/messagequeue', { 'endpoints': [], 'protocol': config['core']['broker']['protocol'], 'user': config['core']['broker']['login'], 'port': config['core']['broker']['port'], 'password': config['core']['broker']['password'], 'queues': config['core']['broker']['queues'] }) host_key = '/ovs/framework/hosts/{0}{{0}}'.format( host_id) EtcdConfiguration.set( host_key.format('/storagedriver'), { 'rsp': config['storagedriver']['rsp'], 'vmware_mode': config['storagedriver']['vmware_mode'] }) EtcdConfiguration.set(host_key.format('/ports'), config['ports']) EtcdConfiguration.set( host_key.format('/setupcompleted'), config['core']['setupcompleted']) EtcdConfiguration.set( host_key.format('/versions'), config['core'].get('versions', {})) EtcdConfiguration.set(host_key.format('/type'), config['core']['nodetype']) EtcdConfiguration.set(host_key.format('/ip'), config['grid']['ip']) path = '{0}/memcacheclient.cfg'.format( EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) if os.path.exists(path): config = RawConfigParser() config.read(path) nodes = [ config.get(node.strip(), 'location').strip() for node in config.get('main', 'nodes').split(',') ] EtcdConfiguration.set( '/ovs/framework/memcache|endpoints', nodes) os.remove(path) path = '{0}/rabbitmqclient.cfg'.format( EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) if os.path.exists(path): config = RawConfigParser() config.read(path) nodes = [ config.get(node.strip(), 'location').strip() for node in config.get('main', 'nodes').split(',') ] EtcdConfiguration.set( '/ovs/framework/messagequeue|endpoints', nodes) os.remove(path) # Migrate arakoon configuration files from ovs.extensions.db.arakoon import ArakoonInstaller reload(ArakoonInstaller) from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller, ArakoonClusterConfig from ovs.extensions.generic.sshclient import SSHClient if master_ips is not None: config_dir = '/opt/OpenvStorage/config/arakoon/' for ip in master_ips: client = SSHClient(ip) if client.dir_exists(config_dir): for cluster_name in client.dir_list( config_dir): try: with open('{0}/{1}/{1}.cfg'.format( config_dir, cluster_name)) as config_file: EtcdConfiguration.set( ArakoonClusterConfig. ETCD_CONFIG_KEY.format( cluster_name), config_file.read(), raw=True) ArakoonInstaller.deploy_cluster( cluster_name, ip) except: logger.exception( 'Error migrating {0} on {1}'. format(cluster_name, ip)) client.dir_delete(config_dir) except: logger.exception('Error migrating to version 4') working_version = 4 return working_version
def migrate(previous_version, master_ips=None, extra_ips=None): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at verison 3 it will execute two steps: - 1 > 2 - 2 > 3 :param previous_version: The previous version from which to start the migration. :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ logger = LogHandler.get('extensions', name='albamigration') working_version = previous_version # Version 1 introduced: # - Etcd if working_version < 1: try: import os import json from ovs.extensions.db.etcd import installer reload(installer) from ovs.extensions.db.etcd.installer import EtcdInstaller from ovs.extensions.db.etcd.configuration import EtcdConfiguration from ovs.extensions.generic.system import System host_id = System.get_my_machine_id() etcd_migrate = False if EtcdInstaller.has_cluster('127.0.0.1', 'config'): etcd_migrate = True else: if master_ips is not None and extra_ips is not None: cluster_ip = None for ip in master_ips + extra_ips: if EtcdInstaller.has_cluster(ip, 'config'): cluster_ip = ip break node_ip = None path = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(path): with open(path) as config_file: config = json.load(config_file) node_ip = config['grid']['ip'] if node_ip is not None: if cluster_ip is None: EtcdInstaller.create_cluster('config', node_ip) EtcdConfiguration.initialize() EtcdConfiguration.initialize_host(host_id) else: EtcdInstaller.extend_cluster(cluster_ip, node_ip, 'config') EtcdConfiguration.initialize_host(host_id) etcd_migrate = True if etcd_migrate is True: # At this point, there is an etcd cluster. Migrating alba.json path = '/opt/OpenvStorage/config/alba.json' if os.path.exists(path): with open(path) as config_file: config = json.load(config_file) EtcdConfiguration.set('/ovs/framework/plugins/alba/config', config) os.remove(path) EtcdConfiguration.set('/ovs/alba/backends/global_gui_error_interval', 300) except: logger.exception('Error migrating to version 1') working_version = 1 return working_version
def remove_node(node_ip, silent=None): """ Remove the node with specified IP from the cluster :param node_ip: IP of the node to remove :type node_ip: str :param silent: If silent == '--force-yes' no question will be asked to confirm the removal :type silent: str :return: None """ from ovs.lib.storagedriver import StorageDriverController from ovs.lib.storagerouter import StorageRouterController from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeRemovalController._logger, messages="Remove node", boxed=True) Toolbox.log( logger=NodeRemovalController._logger, messages="WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n", ) ############### # VALIDATIONS # ############### try: node_ip = node_ip.strip() if not isinstance(node_ip, str): raise ValueError("Node IP must be a string") if not re.match(SSHClient.IP_REGEX, node_ip): raise ValueError("Invalid IP {0} specified".format(node_ip)) storage_router_all = StorageRouterList.get_storagerouters() storage_router_masters = StorageRouterList.get_masters() storage_router_all_ips = set([storage_router.ip for storage_router in storage_router_all]) storage_router_master_ips = set([storage_router.ip for storage_router in storage_router_masters]) storage_router_to_remove = StorageRouterList.get_by_ip(node_ip) if node_ip not in storage_router_all_ips: raise ValueError( "Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}".format( "\n - ".join(storage_router_all_ips), node_ip ) ) if len(storage_router_all_ips) == 1: raise RuntimeError("Removing the only node is not possible") if node_ip in storage_router_master_ips and len(storage_router_master_ips) == 1: raise RuntimeError("Removing the only master node is not possible") if System.get_my_storagerouter() == storage_router_to_remove: raise RuntimeError( "The node to be removed cannot be identical to the node on which the removal is initiated" ) Toolbox.log( logger=NodeRemovalController._logger, messages="Creating SSH connections to remaining master nodes" ) master_ip = None ip_client_map = {} storage_routers_offline = [] storage_router_to_remove_online = True for storage_router in storage_router_all: try: client = SSHClient(storage_router, username="******") if client.run(["pwd"]): Toolbox.log( logger=NodeRemovalController._logger, messages=" Node with IP {0:<15} successfully connected to".format(storage_router.ip), ) ip_client_map[storage_router.ip] = client if storage_router != storage_router_to_remove and storage_router.node_type == "MASTER": master_ip = storage_router.ip except UnableToConnectException: Toolbox.log( logger=NodeRemovalController._logger, messages=" Node with IP {0:<15} is unreachable".format(storage_router.ip), ) storage_routers_offline.append(storage_router) if storage_router == storage_router_to_remove: storage_router_to_remove_online = False if len(ip_client_map) == 0 or master_ip is None: raise RuntimeError("Could not connect to any master node in the cluster") storage_router_to_remove.invalidate_dynamics("vdisks_guids") if ( len(storage_router_to_remove.vdisks_guids) > 0 ): # vDisks are supposed to be moved away manually before removing a node raise RuntimeError("Still vDisks attached to Storage Router {0}".format(storage_router_to_remove.name)) internal_memcached = Toolbox.is_service_internally_managed(service="memcached") internal_rabbit_mq = Toolbox.is_service_internally_managed(service="rabbitmq") memcached_endpoints = Configuration.get(key="/ovs/framework/memcache|endpoints") rabbit_mq_endpoints = Configuration.get(key="/ovs/framework/messagequeue|endpoints") copy_memcached_endpoints = list(memcached_endpoints) copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints) for endpoint in memcached_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_memcached_endpoints.remove(endpoint) for endpoint in rabbit_mq_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_rabbit_mq_endpoints.remove(endpoint) if len(copy_memcached_endpoints) == 0 and internal_memcached is True: raise RuntimeError( "Removal of provided nodes will result in a complete removal of the memcached service" ) if len(copy_rabbit_mq_endpoints) == 0 and internal_rabbit_mq is True: raise RuntimeError( "Removal of provided nodes will result in a complete removal of the messagequeue service" ) except Exception as exception: Toolbox.log( logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel="exception" ) sys.exit(1) ################# # CONFIRMATIONS # ################# interactive = silent != "--force-yes" remove_asd_manager = not interactive # Remove ASD manager if non-interactive else ask if interactive is True: proceed = Interactive.ask_yesno( message="Are you sure you want to remove node {0}?".format(storage_router_to_remove.name), default_value=False, ) if proceed is False: Toolbox.log(logger=NodeRemovalController._logger, messages="Abort removal", title=True) sys.exit(1) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") if ServiceManager.has_service(name="asd-manager", client=client): remove_asd_manager = Interactive.ask_yesno( message="Do you also want to remove the ASD manager and related ASDs?", default_value=False ) if remove_asd_manager is True or storage_router_to_remove_online is False: for function in Toolbox.fetch_hooks("setup", "validate_asd_removal"): validation_output = function(storage_router_to_remove.ip) if validation_output["confirm"] is True: if Interactive.ask_yesno(message=validation_output["question"], default_value=False) is False: remove_asd_manager = False break ########### # REMOVAL # ########### try: Toolbox.log( logger=NodeRemovalController._logger, messages="Starting removal of node {0} - {1}".format( storage_router_to_remove.name, storage_router_to_remove.ip ), ) if storage_router_to_remove_online is False: Toolbox.log( logger=NodeRemovalController._logger, messages=" Marking all Storage Drivers served by Storage Router {0} as offline".format( storage_router_to_remove.ip ), ) StorageDriverController.mark_offline(storagerouter_guid=storage_router_to_remove.guid) # Remove vPools Toolbox.log( logger=NodeRemovalController._logger, messages=" Removing vPools from node".format(storage_router_to_remove.ip), ) storage_routers_offline_guids = [ sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid ] for storage_driver in storage_router_to_remove.storagedrivers: Toolbox.log( logger=NodeRemovalController._logger, messages=" Removing vPool {0} from node".format(storage_driver.vpool.name), ) StorageRouterController.remove_storagedriver( storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids ) # Demote if MASTER if storage_router_to_remove.node_type == "MASTER": NodeTypeController.demote_node( cluster_ip=storage_router_to_remove.ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=storage_router_to_remove.machine_id, unconfigure_memcached=internal_memcached, unconfigure_rabbitmq=internal_rabbit_mq, offline_nodes=storage_routers_offline, ) # Stop / remove services Toolbox.log(logger=NodeRemovalController._logger, messages="Stopping and removing services") config_store = Configuration.get_store() if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") NodeRemovalController.remove_services( client=client, node_type=storage_router_to_remove.node_type.lower(), logger=NodeRemovalController._logger, ) service = "watcher-config" if ServiceManager.has_service(service, client=client): Toolbox.log(logger=NodeRemovalController._logger, messages="Removing service {0}".format(service)) ServiceManager.stop_service(service, client=client) ServiceManager.remove_service(service, client=client) if config_store == "etcd": from ovs.extensions.db.etcd.installer import EtcdInstaller if Configuration.get(key="/ovs/framework/external_config") is None: Toolbox.log(logger=NodeRemovalController._logger, messages=" Removing Etcd cluster") try: EtcdInstaller.stop("config", client) EtcdInstaller.remove("config", client) except Exception as ex: Toolbox.log( logger=NodeRemovalController._logger, messages=["\nFailed to unconfigure Etcd", ex], loglevel="exception", ) Toolbox.log(logger=NodeRemovalController._logger, messages="Removing Etcd proxy") EtcdInstaller.remove_proxy("config", client.ip) Toolbox.run_hooks( component="noderemoval", sub_component="remove", logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip, complete_removal=remove_asd_manager, ) # Clean up model Toolbox.log(logger=NodeRemovalController._logger, messages="Removing node from model") for service in storage_router_to_remove.services: service.delete() for disk in storage_router_to_remove.disks: for partition in disk.partitions: partition.delete() disk.delete() for j_domain in storage_router_to_remove.domains: j_domain.delete() Configuration.delete("/ovs/framework/hosts/{0}".format(storage_router_to_remove.machine_id)) NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, offline_node_ips=[node.ip for node in storage_routers_offline], logger=NodeRemovalController._logger, ) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") if config_store == "arakoon": client.file_delete(filenames=[ArakoonConfiguration.CACC_LOCATION]) client.file_delete(filenames=[Configuration.BOOTSTRAP_CONFIG_LOCATION]) storage_router_to_remove.delete() Toolbox.log(logger=NodeRemovalController._logger, messages="Successfully removed node\n") except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages="\n") Toolbox.log( logger=NodeRemovalController._logger, messages=["An unexpected error occurred:", str(exception)], boxed=True, loglevel="exception", ) sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages="\n") Toolbox.log( logger=NodeRemovalController._logger, messages="This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.", boxed=True, loglevel="error", ) sys.exit(1) if remove_asd_manager is True: Toolbox.log(logger=NodeRemovalController._logger, messages="\nRemoving ASD Manager") with remote(storage_router_to_remove.ip, [os]) as rem: rem.os.system("asd-manager remove --force-yes") Toolbox.log(logger=NodeRemovalController._logger, messages="Remove nodes finished", title=True)
def migrate(previous_version, master_ips=None, extra_ips=None): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at verison 3 it will execute two steps: - 1 > 2 - 2 > 3 :param previous_version: The previous version from which to start the migration. :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ working_version = previous_version # Version 1 introduced: # - Flexible SSD layout if working_version < 1: try: from ovs.extensions.generic.configuration import Configuration if Configuration.exists('ovs.arakoon'): Configuration.delete('ovs.arakoon', remove_root=True) Configuration.set('ovs.core.ovsdb', '/opt/OpenvStorage/db') except: logger.exception('Error migrating to version 1') working_version = 1 # Version 2 introduced: # - Registration if working_version < 2: try: import time from ovs.extensions.generic.configuration import Configuration if not Configuration.exists('ovs.core.registered'): Configuration.set('ovs.core.registered', False) Configuration.set('ovs.core.install_time', time.time()) except: logger.exception('Error migrating to version 2') working_version = 2 # Version 3 introduced: # - New arakoon clients if working_version < 3: try: from ovs.extensions.db.arakoon import ArakoonInstaller reload(ArakoonInstaller) from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.generic.configuration import Configuration if master_ips is not None: for ip in master_ips: client = SSHClient(ip) if client.dir_exists(ArakoonInstaller.ARAKOON_CONFIG_DIR): for cluster_name in client.dir_list(ArakoonInstaller.ARAKOON_CONFIG_DIR): try: ArakoonInstaller.deploy_cluster(cluster_name, ip) except: pass if Configuration.exists('ovs.core.storage.persistent'): Configuration.set('ovs.core.storage.persistent', 'pyrakoon') except: logger.exception('Error migrating to version 3') working_version = 3 # Version 4 introduced: # - Etcd if working_version < 4: try: import os import json from ConfigParser import RawConfigParser from ovs.extensions.db.etcd import installer reload(installer) from ovs.extensions.db.etcd.installer import EtcdInstaller from ovs.extensions.db.etcd.configuration import EtcdConfiguration from ovs.extensions.generic.system import System host_id = System.get_my_machine_id() etcd_migrate = False if EtcdInstaller.has_cluster('127.0.0.1', 'config'): etcd_migrate = True else: if master_ips is not None and extra_ips is not None: cluster_ip = None for ip in master_ips + extra_ips: if EtcdInstaller.has_cluster(ip, 'config'): cluster_ip = ip break node_ip = None path = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(path): with open(path) as config_file: config = json.load(config_file) node_ip = config['grid']['ip'] if node_ip is not None: if cluster_ip is None: EtcdInstaller.create_cluster('config', node_ip) EtcdConfiguration.initialize() EtcdConfiguration.initialize_host(host_id) else: EtcdInstaller.extend_cluster(cluster_ip, node_ip, 'config') EtcdConfiguration.initialize_host(host_id) etcd_migrate = True if etcd_migrate is True: # Migrating configuration files path = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(path): with open(path) as config_file: config = json.load(config_file) EtcdConfiguration.set('/ovs/framework/cluster_id', config['support']['cid']) if not EtcdConfiguration.exists('/ovs/framework/install_time'): EtcdConfiguration.set('/ovs/framework/install_time', config['core']['install_time']) else: EtcdConfiguration.set('/ovs/framework/install_time', min(EtcdConfiguration.get('/ovs/framework/install_time'), config['core']['install_time'])) EtcdConfiguration.set('/ovs/framework/registered', config['core']['registered']) EtcdConfiguration.set('/ovs/framework/plugins/installed', config['plugins']) EtcdConfiguration.set('/ovs/framework/stores', config['core']['storage']) EtcdConfiguration.set('/ovs/framework/paths', {'cfgdir': config['core']['cfgdir'], 'basedir': config['core']['basedir'], 'ovsdb': config['core']['ovsdb']}) EtcdConfiguration.set('/ovs/framework/support', {'enablesupport': config['support']['enablesupport'], 'enabled': config['support']['enabled'], 'interval': config['support']['interval']}) EtcdConfiguration.set('/ovs/framework/storagedriver', {'mds_safety': config['storagedriver']['mds']['safety'], 'mds_tlogs': config['storagedriver']['mds']['tlogs'], 'mds_maxload': config['storagedriver']['mds']['maxload']}) EtcdConfiguration.set('/ovs/framework/webapps', {'html_endpoint': config['webapps']['html_endpoint'], 'oauth2': config['webapps']['oauth2']}) EtcdConfiguration.set('/ovs/framework/messagequeue', {'endpoints': [], 'protocol': config['core']['broker']['protocol'], 'user': config['core']['broker']['login'], 'port': config['core']['broker']['port'], 'password': config['core']['broker']['password'], 'queues': config['core']['broker']['queues']}) host_key = '/ovs/framework/hosts/{0}{{0}}'.format(host_id) EtcdConfiguration.set(host_key.format('/storagedriver'), {'rsp': config['storagedriver']['rsp'], 'vmware_mode': config['storagedriver']['vmware_mode']}) EtcdConfiguration.set(host_key.format('/ports'), config['ports']) EtcdConfiguration.set(host_key.format('/setupcompleted'), config['core']['setupcompleted']) EtcdConfiguration.set(host_key.format('/versions'), config['core'].get('versions', {})) EtcdConfiguration.set(host_key.format('/type'), config['core']['nodetype']) EtcdConfiguration.set(host_key.format('/ip'), config['grid']['ip']) path = '{0}/memcacheclient.cfg'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) if os.path.exists(path): config = RawConfigParser() config.read(path) nodes = [config.get(node.strip(), 'location').strip() for node in config.get('main', 'nodes').split(',')] EtcdConfiguration.set('/ovs/framework/memcache|endpoints', nodes) os.remove(path) path = '{0}/rabbitmqclient.cfg'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) if os.path.exists(path): config = RawConfigParser() config.read(path) nodes = [config.get(node.strip(), 'location').strip() for node in config.get('main', 'nodes').split(',')] EtcdConfiguration.set('/ovs/framework/messagequeue|endpoints', nodes) os.remove(path) # Migrate arakoon configuration files from ovs.extensions.db.arakoon import ArakoonInstaller reload(ArakoonInstaller) from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller, ArakoonClusterConfig from ovs.extensions.generic.sshclient import SSHClient if master_ips is not None: config_dir = '/opt/OpenvStorage/config/arakoon/' for ip in master_ips: client = SSHClient(ip) if client.dir_exists(config_dir): for cluster_name in client.dir_list(config_dir): try: with open('{0}/{1}/{1}.cfg'.format(config_dir, cluster_name)) as config_file: EtcdConfiguration.set(ArakoonClusterConfig.ETCD_CONFIG_KEY.format(cluster_name), config_file.read(), raw=True) ArakoonInstaller.deploy_cluster(cluster_name, ip) except: logger.exception('Error migrating {0} on {1}'.format(cluster_name, ip)) client.dir_delete(config_dir) except: logger.exception('Error migrating to version 4') working_version = 4 return working_version
def demote_node(cluster_ip, master_ip, ip_client_map, unique_id, unconfigure_memcached, unconfigure_rabbitmq, offline_nodes=None): """ Demotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeTypeController._logger, messages='Demoting node', title=True) if offline_nodes is None: offline_nodes = [] if unconfigure_memcached is True and len(offline_nodes) == 0: if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False: raise RuntimeError('Not all memcache nodes can be reached which is required for demoting a node.') # Find other (arakoon) master nodes arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False) config.load_config() master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError('There should be at least one other master node') storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'EXTRA' storagerouter.save() offline_node_ips = [node.ip for node in offline_nodes] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon {0} cluster'.format(arakoon_cluster_name)) ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, remaining_node_ips=master_node_ips, cluster_name=arakoon_cluster_name, offline_nodes=offline_node_ips) try: external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: config_store = Configuration.get_store() if config_store == 'arakoon': Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon config cluster') ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, remaining_node_ips=master_node_ips, cluster_name='config', offline_nodes=offline_node_ips, filesystem=True) else: from ovs.extensions.db.etcd.installer import EtcdInstaller Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Etcd cluster') EtcdInstaller.shrink_cluster(master_ip, cluster_ip, 'config', offline_node_ips) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to leave configuration cluster', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') try: if unconfigure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 11211) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if unconfigure_rabbitmq is True: endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 5672) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to update configurations', ex], loglevel='exception') if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') remaining_nodes = ip_client_map.keys()[:] if cluster_ip in remaining_nodes: remaining_nodes.remove(cluster_ip) PersistentFactory.store = None VolatileFactory.store = None for service in storagerouter.services: if service.name == 'arakoon-ovsdb': service.delete() target_client = None if storagerouter in offline_nodes: if unconfigure_rabbitmq is True: Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring offline RabbitMQ node') client = ip_client_map[master_ip] try: client.run(['rabbitmqctl', 'forget_cluster_node', 'rabbit@{0}'.format(storagerouter.name)]) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to forget RabbitMQ cluster node', ex], loglevel='exception') else: target_client = ip_client_map[cluster_ip] if unconfigure_rabbitmq is True: Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring RabbitMQ') try: if ServiceManager.has_service('rabbitmq-server', client=target_client): Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run(['rabbitmqctl', 'reset']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) target_client.file_unlink("/var/lib/rabbitmq/.erlang.cookie") Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) # To be sure except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove/unconfigure RabbitMQ', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Stopping services') services = ['memcached', 'rabbitmq-server'] if unconfigure_rabbitmq is False: services.remove('rabbitmq-server') if unconfigure_memcached is False: services.remove('memcached') for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.log(logger=NodeTypeController._logger, messages='Stopping service {0}'.format(service)) try: Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to stop service'.format(service), ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Removing services') services = ['scheduled-tasks', 'webapp-api', 'volumerouter-consumer'] for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.log(logger=NodeTypeController._logger, messages='Removing service {0}'.format(service)) try: Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger) ServiceManager.remove_service(service, client=target_client) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove service'.format(service), ex], loglevel='exception') if ServiceManager.has_service('workers', client=target_client): ServiceManager.add_service(name='workers', client=target_client, params={'WORKER_QUEUE': '{0}'.format(unique_id)}) try: NodeTypeController._configure_amqp_to_volumedriver() except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to configure AMQP to Storage Driver', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if Toolbox.run_hooks(component='nodetype', sub_component='demote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip, offline_node_ips=offline_node_ips): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if storagerouter not in offline_nodes: target_client = ip_client_map[cluster_ip] node_name, _ = target_client.get_hostname() if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='extra', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id), 'EXTRA') if target_client is not None and target_client.file_exists('/tmp/ovs_rollback'): target_client.file_write('/tmp/ovs_rollback', 'rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Demote complete', title=True)
def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq): """ Promotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.servicelist import ServiceList from ovs.dal.hybrids.service import Service Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True) if configure_memcached is True: if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False: raise RuntimeError('Not all memcache nodes can be reached which is required for promoting a node.') target_client = ip_client_map[cluster_ip] machine_id = System.get_my_machine_id(target_client) node_name, _ = target_client.get_hostname() master_client = ip_client_map[master_ip] storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'MASTER' storagerouter.save() external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: config_store = Configuration.get_store() if config_store == 'arakoon': Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster') metadata = ArakoonInstaller.extend_cluster(master_ip=master_ip, new_ip=cluster_ip, cluster_name='config', base_dir=Configuration.get('/ovs/framework/paths|ovsdb'), ports=[26400, 26401], filesystem=True) ArakoonInstaller.restart_cluster_add(cluster_name='config', current_ips=metadata['ips'], new_ip=cluster_ip, filesystem=True) ServiceManager.register_service(node_name=machine_id, service_metadata=metadata['service_metadata']) else: from ovs.extensions.db.etcd.installer import EtcdInstaller Toolbox.log(logger=NodeTypeController._logger, messages='Joining Etcd cluster') EtcdInstaller.extend_cluster(master_ip, cluster_ip, 'config') # Find other (arakoon) master nodes arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False) config.load_config() master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError('There should be at least one other master node') arakoon_ports = [] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster') result = ArakoonInstaller.extend_cluster(master_ip=master_ip, new_ip=cluster_ip, cluster_name=arakoon_cluster_name, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name, current_ips=result['ips'], new_ip=cluster_ip, filesystem=False) arakoon_ports = [result['client_port'], result['messaging_port']] if configure_memcached is True: NodeTypeController.configure_memcached(client=target_client, logger=NodeTypeController._logger) NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') if configure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:11211'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if configure_rabbitmq is True: endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints') endpoint = '{0}:5672'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name, current_ips=master_node_ips, new_ip=cluster_ip, filesystem=False) PersistentFactory.store = None VolatileFactory.store = None if 'arakoon-ovsdb' not in [s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip]: service = Service() service.name = 'arakoon-ovsdb' service.type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON) service.ports = arakoon_ports service.storagerouter = storagerouter service.save() if configure_rabbitmq is True: NodeTypeController.configure_rabbitmq(client=target_client, logger=NodeTypeController._logger) # Copy rabbitmq cookie rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie' Toolbox.log(logger=NodeTypeController._logger, messages='Copying Rabbit MQ cookie') contents = master_client.file_read(rabbitmq_cookie_file) master_hostname, _ = master_client.get_hostname() target_client.dir_create(os.path.dirname(rabbitmq_cookie_file)) target_client.file_write(rabbitmq_cookie_file, contents) target_client.file_chmod(rabbitmq_cookie_file, mode=400) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run(['rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname)]) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) # Enable HA for the rabbitMQ queues Toolbox.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger) NodeTypeController.check_rabbitmq_and_enable_ha_mode(client=target_client, logger=NodeTypeController._logger) NodeTypeController._configure_amqp_to_volumedriver() Toolbox.log(logger=NodeTypeController._logger, messages='Starting services') services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server', 'etcd-config'] if arakoon_metadata['internal'] is True: services.remove('arakoon-ovsdb') for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.change_service_state(target_client, service, 'start', NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger) if Toolbox.run_hooks(component='nodetype', sub_component='promote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger) if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER') target_client.run(['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config']) Configuration.set('/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True) if target_client.file_exists('/tmp/ovs_rollback'): target_client.file_delete('/tmp/ovs_rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')