def add_services(client, node_type, logger): """ Add the services required by the OVS cluster :param client: Client on which to add the services :type client: ovs.extensions.generic.sshclient.SSHClient :param node_type: Type of node ('master' or 'extra') :type node_type: str :param logger: Logger object used for logging :type logger: ovs.log.log_handler.LogHandler :return: None """ Toolbox.log(logger=logger, messages='Adding services') services = {} worker_queue = System.get_my_machine_id(client=client) if node_type == 'master': worker_queue += ',ovs_masters' services.update({'memcached': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue}, 'rabbitmq-server': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue}, 'scheduled-tasks': {}, 'webapp-api': {}, 'volumerouter-consumer': {}}) services.update({'workers': {'WORKER_QUEUE': worker_queue}, 'watcher-framework': {}}) for service_name, params in services.iteritems(): if not ServiceManager.has_service(service_name, client): Toolbox.log(logger=logger, messages='Adding service {0}'.format(service_name)) ServiceManager.add_service(name=service_name, params=params, client=client)
def restart_framework_and_memcache_services(clients, logger, offline_node_ips=None): """ Restart framework and Memcached services :param clients: Clients on which to restart these services :type clients: dict :param logger: Logger object used for logging :type logger: ovs.log.log_handler.LogHandler :param offline_node_ips: IP addresses of offline nodes in the cluster :type offline_node_ips: list :return: None """ from ovs.dal.lists.storagerouterlist import StorageRouterList master_ips = [sr.ip for sr in StorageRouterList.get_masters()] slave_ips = [sr.ip for sr in StorageRouterList.get_slaves()] if offline_node_ips is None: offline_node_ips = [] memcached = 'memcached' watcher = 'watcher-framework' support_agent = 'support-agent' for ip in master_ips + slave_ips: if ip not in offline_node_ips: if ServiceManager.has_service(watcher, clients[ip]): Toolbox.change_service_state(clients[ip], watcher, 'stop', logger) for ip in master_ips: if ip not in offline_node_ips: Toolbox.change_service_state(clients[ip], memcached, 'restart', logger) for ip in master_ips + slave_ips: if ip not in offline_node_ips: if ServiceManager.has_service(watcher, clients[ip]): Toolbox.change_service_state(clients[ip], watcher, 'start', logger) if ServiceManager.has_service(support_agent, clients[ip]): Toolbox.change_service_state(clients[ip], support_agent, 'restart', logger) VolatileFactory.store = None
def remove_services(client, node_type, logger): """ Remove all services managed by OVS :param client: Client on which to remove the services :type client: ovs.extensions.generic.sshclient.SSHClient :param node_type: Type of node, can be 'master' or 'extra' :type node_type: str :param logger: Logger object used for logging :type logger: ovs.log.log_handler.LogHandler :return: None """ Toolbox.log(logger=logger, messages="Removing services") stop_only = ["rabbitmq-server", "memcached"] services = ["workers", "support-agent", "watcher-framework"] if node_type == "master": services += ["scheduled-tasks", "webapp-api", "volumerouter-consumer"] if Toolbox.is_service_internally_managed(service="rabbitmq") is True: services.append("rabbitmq-server") if Toolbox.is_service_internally_managed(service="memcached") is True: services.append("memcached") for service in services: if ServiceManager.has_service(service, client=client): Toolbox.log( logger=logger, messages="{0} service {1}".format("Removing" if service not in stop_only else "Stopping", service), ) ServiceManager.stop_service(service, client=client) if service not in stop_only: ServiceManager.remove_service(service, client=client)
def on_demote(cluster_ip, master_ip, offline_node_ips=None): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :param master_ip: IP of the master node :param offline_node_ips: IPs of nodes which are offline """ if offline_node_ips is None: offline_node_ips = [] client = SSHClient(cluster_ip, username='******') if cluster_ip not in offline_node_ips else None servicetype = ServiceTypeList.get_by_name('Arakoon') current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv': if service.storagerouter.ip == cluster_ip: current_service = service elif service.storagerouter.ip not in offline_node_ips: remaining_ips.append(service.storagerouter.ip) if current_service is not None: print '* Shrink StorageDriver cluster' ArakoonInstaller.shrink_cluster(master_ip, cluster_ip, 'voldrv', offline_node_ips) if client is not None and ServiceManager.has_service(current_service.name, client=client) is True: ServiceManager.stop_service(current_service.name, client=client) ServiceManager.remove_service(current_service.name, client=client) ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips) current_service.delete() StorageDriverController._configure_arakoon_to_volumedriver(offline_node_ips)
def _deploy(config, offline_nodes=None): """ Deploys a complete cluster: Distributing the configuration files, creating directories and services """ ArakoonInstaller._logger.debug('Deploying cluster {0}'.format(config.cluster_id)) if offline_nodes is None: offline_nodes = [] for node in config.nodes: if node.ip in offline_nodes: continue ArakoonInstaller._logger.debug(' Deploying cluster {0} on {1}'.format(config.cluster_id, node.ip)) root_client = SSHClient(node.ip, username='******') # Distributes a configuration file to all its nodes config.write_config() # Create dirs as root because mountpoint /mnt/cache1 is typically owned by root abs_paths = [node.log_dir, node.tlog_dir, node.home] if not root_client.dir_exists(abs_paths): root_client.dir_create(abs_paths) root_client.dir_chmod(abs_paths, 0755, recursive=True) root_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) # Creates services for/on all nodes in the config base_name = 'ovs-arakoon' target_name = 'ovs-arakoon-{0}'.format(config.cluster_id) ServiceManager.add_service(base_name, root_client, params={'CLUSTER': config.cluster_id, 'NODE_ID': node.name, 'CONFIG_PATH': ArakoonInstaller.ETCD_CONFIG_PATH.format(config.cluster_id)}, target_name=target_name) ArakoonInstaller._logger.debug(' Deploying cluster {0} on {1} completed'.format(config.cluster_id, node.ip))
def stop(cluster_name, client): """ Stops an arakoon service """ if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True and \ ServiceManager.get_service_status('arakoon-{0}'.format(cluster_name), client=client) is True: ServiceManager.stop_service('arakoon-{0}'.format(cluster_name), client=client)
def on_demote(cluster_ip, master_ip): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :param master_ip: IP of the master node """ client = SSHClient(cluster_ip, username='******') servicetype = ServiceTypeList.get_by_name('Arakoon') current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv': if service.storagerouter.ip == cluster_ip: current_service = service else: remaining_ips.append(service.storagerouter.ip) if current_service is not None: print '* Shrink StorageDriver cluster' ArakoonInstaller.shrink_cluster(master_ip, cluster_ip, 'voldrv') if ServiceManager.has_service(current_service.name, client=client) is True: ServiceManager.stop_service(current_service.name, client=client) ServiceManager.remove_service(current_service.name, client=client) ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips) current_service.delete() for storagerouter in StorageRouterList.get_storagerouters(): ArakoonInstaller.deploy_to_slave(master_ip, storagerouter.ip, 'voldrv') StorageDriverController._configure_arakoon_to_volumedriver()
def _deploy(config): """ Deploys a complete cluster: Distributing the configuration files, creating directories and services """ logger.debug("Deploying cluster {0}".format(config.cluster_id)) for node in config.nodes: logger.debug(" Deploying cluster {0} on {1}".format(config.cluster_id, node.ip)) ovs_client = SSHClient(node.ip) root_client = SSHClient(node.ip, username="******") # Distributes a configuration file to all its nodes config.write_config(ovs_client) # Create dirs as root because mountpoint /mnt/cache1 is typically owned by root abs_paths = [node.log_dir, node.tlog_dir, node.home] root_client.dir_create(abs_paths) root_client.dir_chmod(abs_paths, 0755, recursive=True) root_client.dir_chown(abs_paths, "ovs", "ovs", recursive=True) # Creates services for/on all nodes in the config base_name = "ovs-arakoon" target_name = "ovs-arakoon-{0}".format(config.cluster_id) ServiceManager.prepare_template(base_name, target_name, ovs_client) ServiceManager.add_service(target_name, root_client, params={"CLUSTER": config.cluster_id}) logger.debug(" Deploying cluster {0} on {1} completed".format(config.cluster_id, node.ip))
def _setup_proxy(initial_cluster, slave_client, cluster_name, force=False): base_name = 'ovs-etcd-proxy' target_name = 'ovs-etcd-{0}'.format(cluster_name) if force is False and ServiceManager.has_service(target_name, slave_client) and \ ServiceManager.get_service_status(target_name, slave_client) is True: logger.info('Service {0} already configured and running'.format(target_name)) return EtcdInstaller.stop(cluster_name, slave_client) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] slave_client.dir_delete(abs_paths) slave_client.dir_create(data_dir) slave_client.dir_chmod(data_dir, 0755, recursive=True) slave_client.dir_chown(data_dir, 'ovs', 'ovs', recursive=True) ServiceManager.add_service(base_name, slave_client, params={'CLUSTER': cluster_name, 'DATA_DIR': data_dir, 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': initial_cluster}, target_name=target_name) EtcdInstaller.start(cluster_name, slave_client) EtcdInstaller.wait_for_cluster(cluster_name, slave_client)
def delete_cluster(cluster_name, ip, filesystem=False): """ Deletes a complete cluster :param cluster_name: Name of the cluster to remove :type cluster_name: str :param ip: IP address of the last node of a cluster :type ip: str :param filesystem: Indicates whether the configuration should be on the filesystem or in a configuration cluster :type filesystem: bool :return: None """ ArakoonInstaller._logger.debug('Deleting cluster {0} on {1}'.format(cluster_name, ip)) config = ArakoonClusterConfig(cluster_name, filesystem) config.load_config(ip) cluster_type = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=config.cluster_id, filesystem=filesystem, ip=ip)['cluster_type'] service_name = ArakoonInstaller.get_service_name_for_cluster(cluster_name=config.cluster_id) for node in config.nodes: try: ServiceManager.unregister_service(service_name=service_name, node_name=node.name) except: ArakoonInstaller._logger.exception('Un-registering service {0} on {1} failed'.format(service_name, ip)) # Cleans up a complete cluster (remove services, directories and configuration files) for node in config.nodes: ArakoonInstaller._destroy_node(config, node, delay_unregistration=cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.CFG) config.delete_config(ip) ArakoonInstaller._logger.debug('Deleting cluster {0} on {1} completed'.format(cluster_name, ip))
def start(cluster_name, client): """ Starts an arakoon cluster :param client: Client on which to start the service :param cluster_name: The name of the cluster service to start """ if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True: ServiceManager.start_service('arakoon-{0}'.format(cluster_name), client=client)
def start_service(name, client): """ Start a service :param name: Name of the service :param client: SSHClient object :return: None """ ServiceManager.start_service(name, client)
def stop(cluster_name, client): """ Stops an etcd service :param client: Client on which to stop the service :param cluster_name: The name of the cluster service to stop """ if ServiceManager.has_service('etcd-{0}'.format(cluster_name), client=client) is True: ServiceManager.stop_service('etcd-{0}'.format(cluster_name), client=client)
def restart_required_services(): """ Checks if the ASD MANAGER setup was executed correctly :returns if all services successfully restarted :rtype bool """ ServiceManager.restart_service('avahi-daemon', root_client)
def remove(cluster_name, client): """ Removes an etcd service :param client: Client on which to remove the service :param cluster_name: The name of the cluster service to remove """ if ServiceManager.has_service('etcd-{0}'.format(cluster_name), client=client) is True: ServiceManager.remove_service('etcd-{0}'.format(cluster_name), client=client)
def extend_cluster(master_ip, new_ip, cluster_name): """ Extends a cluster to a given new node :param cluster_name: Name of the cluster to be extended :param new_ip: IP address of the node to be added :param master_ip: IP of one of the already existing nodes """ logger.debug('Extending cluster "{0}" from {1} to {2}'.format(cluster_name, master_ip, new_ip)) client = SSHClient(master_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, client): raise RuntimeError('Cluster "{0}" unhealthy, aborting extend'.format(cluster_name)) cluster_members = client.run('etcdctl member list').splitlines() for cluster_member in cluster_members: if EtcdInstaller.SERVER_URL.format(new_ip) in cluster_member: logger.info('Node {0} already member of etcd cluster'.format(new_ip)) return current_cluster = [] for item in client.run('etcdctl member list').splitlines(): info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() current_cluster.append('{0}={1}'.format(info['name'], info['peer'])) client = SSHClient(new_ip, username='******') node_name = System.get_my_machine_id(client) current_cluster.append('{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(new_ip))) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) EtcdInstaller.stop(cluster_name, client) # Stop a possible proxy service ServiceManager.add_service(base_name, client, params={'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(new_ip), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': ','.join(current_cluster), 'INITIAL_STATE': 'existing', 'INITIAL_PEERS': ''}, target_name=target_name) master_client = SSHClient(master_ip, username='******') master_client.run('etcdctl member add {0} {1}'.format(node_name, EtcdInstaller.SERVER_URL.format(new_ip))) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client) logger.debug('Extending cluster "{0}" from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
def start(cluster_name, client): """ Starts an etcd cluster :param client: Client on which to start the service :param cluster_name: The name of the cluster service to start """ if ServiceManager.has_service('etcd-{0}'.format(cluster_name), client=client) is True and \ ServiceManager.get_service_status('etcd-{0}'.format(cluster_name), client=client) is False: ServiceManager.start_service('etcd-{0}'.format(cluster_name), client=client)
def stop(cluster_name, client): """ Stops an arakoon service """ if ( ServiceManager.has_service("arakoon-{0}".format(cluster_name), client=client) is True and ServiceManager.get_service_status("arakoon-{0}".format(cluster_name), client=client) is True ): ServiceManager.stop_service("arakoon-{0}".format(cluster_name), client=client)
def is_running(cluster_name, client): """ Checks if arakoon service is running :param client: Client on which to stop the service :param cluster_name: The name of the cluster service to stop """ if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client): return ServiceManager.get_service_status('arakoon-{0}'.format(cluster_name), client=client) return False
def stop(cluster_name, client): """ Stops an arakoon service :param client: Client on which to stop the service :param cluster_name: The name of the cluster service to stop """ if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True and \ ServiceManager.get_service_status('arakoon-{0}'.format(cluster_name), client=client) is True: ServiceManager.stop_service('arakoon-{0}'.format(cluster_name), client=client)
def tearDown(self): """ Clean up the unittest """ # Cleaning storage self.volatile.clean() self.persistent.clean() Upstart.clean() ServiceManager.clean() StorageRouterClient.clean()
def setUp(self): """ (Re)Sets the stores on every test """ # Cleaning storage self.volatile.clean() self.persistent.clean() Upstart.clean() ServiceManager.clean() StorageRouterClient.clean()
def _roll_out_dtl_services(vpool, storagerouters): """ Deploy and start the DTL service on all storagerouters :param storagerouters: StorageRouters to deploy and start a DTL service on :return: None """ service_name = 'dtl_{0}'.format(vpool.name) for sr in storagerouters.values(): client = SSHClient(sr, 'root') ServiceManager.add_service(name=service_name, client=client) ServiceManager.start_service(name=service_name, client=client)
def start(cluster_name, client): """ Starts an etcd cluster :param cluster_name: The name of the cluster service to start :type cluster_name: str :param client: Client on which to start the service :type client: SSHClient :return: None """ if ServiceManager.has_service('etcd-{0}'.format(cluster_name), client=client) is True: ServiceManager.start_service('etcd-{0}'.format(cluster_name), client=client)
def stop(cluster_name, client): """ Stops an arakoon service :param cluster_name: The name of the cluster service to stop :type cluster_name: str :param client: Client on which to stop the service :type client: SSHClient :return: None """ service_name = ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name) if ServiceManager.has_service(name=service_name, client=client) is True: ServiceManager.stop_service(name=service_name, client=client)
def setUpClass(cls): """ Sets up the unittest, mocking a certain set of 3rd party libraries and extensions. This makes sure the unittests can be executed without those libraries installed """ cls.persistent = PersistentFactory.get_client() cls.persistent.clean() cls.volatile = VolatileFactory.get_client() cls.volatile.clean() Upstart.clean() ServiceManager.clean() StorageRouterClient.clean()
def _restart_openstack_services(self): """ Restart services on openstack """ services = OSManager.get_openstack_services() for service_name in services: if ServiceManager.has_service(service_name, self.client): try: ServiceManager.restart_service(service_name, self.client) except SystemExit as sex: logger.debug('Failed to restart service {0}. {1}'.format(service_name, sex)) time.sleep(3) return self._is_cinder_running()
def remove(cluster_name, client): """ Removes an arakoon service :param cluster_name: The name of the cluster service to remove :type cluster_name: str :param client: Client on which to remove the service :type client: SSHClient :return: None """ if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True: ServiceManager.remove_service('arakoon-{0}'.format(cluster_name), client=client)
def is_running(cluster_name, client): """ Checks if arakoon service is running :param cluster_name: The name of the cluster service to check :type cluster_name: str :param client: Client on which to check the service :type client: SSHClient :return: None """ service_name = ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name) if ServiceManager.has_service(name=service_name, client=client): return ServiceManager.get_service_status(name=service_name, client=client)[0] return False
def stop(cluster_name, client): """ Stops an arakoon service :param cluster_name: The name of the cluster service to stop :type cluster_name: str :param client: Client on which to stop the service :type client: SSHClient :return: None """ if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True: ServiceManager.stop_service('arakoon-{0}'.format(cluster_name), client=client)
def _is_openstack(self): cinder_service = OSManager.get_openstack_cinder_service_name() return ServiceManager.has_service(cinder_service, self.client)
def extend_cluster(master_ip, new_ip, cluster_name): """ Extends a cluster to a given new node :param base_dir: Base directory that will hold the data :param cluster_name: Name of the cluster to be extended :param new_ip: IP address of the node to be added :param master_ip: IP of one of the already existing nodes """ logger.debug('Extending cluster "{0}" from {1} to {2}'.format( cluster_name, master_ip, new_ip)) client = SSHClient(master_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, client): raise RuntimeError( 'Cluster "{0}" unhealthy, aborting extend'.format( cluster_name)) current_cluster = [] for item in client.run('etcdctl member list').splitlines(): info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() current_cluster.append('{0}={1}'.format(info['name'], info['peer'])) client = SSHClient(new_ip, username='******') node_name = System.get_my_machine_id(client) current_cluster.append('{0}={1}'.format( node_name, EtcdInstaller.SERVER_URL.format(new_ip))) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) EtcdInstaller.stop(cluster_name, client) # Stop a possible proxy service ServiceManager.add_service( base_name, client, params={ 'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(new_ip), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': ','.join(current_cluster), 'INITIAL_STATE': 'existing', 'INITIAL_PEERS': '' }, target_name=target_name) master_client = SSHClient(master_ip, username='******') master_client.run('etcdctl member add {0} {1}'.format( node_name, EtcdInstaller.SERVER_URL.format(new_ip))) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client) logger.debug( 'Extending cluster "{0}" from {1} to {2} completed'.format( cluster_name, master_ip, new_ip))
def _restart_processes(self): """ Restart the cinder process that uses the OVS volume driver - also restarts nova api and compute services """ def stop_screen_process(process_name): out = self.client.run( '''su stack -c 'screen -S {0} -p {1} -Q select 1>/dev/null; echo $?' ''' .format(screen_name, process_name)) process_screen_exists = out == '0' if process_screen_exists: self.client.run( '''su stack -c 'screen -S {0} -p {1} -X stuff \n' '''. format(screen_name, process_name)) self.client.run( '''su stack -c 'screen -S {0} -p {1} -X kill' '''.format( screen_name, process_name)) return process_screen_exists def start_screen_process(process_name, commands): logfile = '{0}/{1}.log.{2}'.format( logdir, process_name, datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d-%H%M%S')) self._logger.debug( self.client.run( '''su stack -c 'touch {0}' '''.format(logfile))) self._logger.debug( self.client.run( '''su stack -c 'screen -S {0} -X screen -t {1}' '''.format( screen_name, process_name))) self._logger.debug( self.client.run( '''su stack -c 'screen -S {0} -p {1} -X logfile {2}' '''. format(screen_name, process_name, logfile))) self._logger.debug( self.client.run( '''su stack -c 'screen -S {0} -p {1} -X log on' '''.format( screen_name, process_name))) time.sleep(1) self._logger.debug( self.client.run('rm {0}/{1}.log || true'.format( logdir, process_name))) self._logger.debug( self.client.run('ln -sf {0} {1}/{2}.log'.format( logfile, logdir, process_name))) for command in commands: cmd = '''su stack -c 'screen -S {0} -p {1} -X stuff "{2}\012"' '''.format( screen_name, process_name, command) self._logger.debug(cmd) self._logger.debug(self.client.run(cmd)) logdir = '/opt/stack/logs' screen_name = 'stack' if self._is_devstack is True: try: c_vol_screen_exists = stop_screen_process('c-vol') n_cpu_screen_exists = stop_screen_process('n-cpu') n_api_screen_exists = stop_screen_process('n-api') c_api_screen_exists = stop_screen_process('c-api') self.client.run('''su stack -c 'mkdir -p /opt/stack/logs' ''') if c_vol_screen_exists: start_screen_process('c-vol', [ "export PYTHONPATH=\"${PYTHONPATH}:/opt/OpenvStorage\" ", "newgrp ovs", "newgrp stack", "umask 0002", "/usr/local/bin/cinder-volume --config-file /etc/cinder/cinder.conf & echo \$! >/opt/stack/status/stack/c-vol.pid; fg || echo c-vol failed to start | tee \"/opt/stack/status/stack/c-vol.failure\" " ]) time.sleep(3) if n_cpu_screen_exists: start_screen_process('n-cpu', [ "newgrp ovs", "newgrp stack", "sg libvirtd /usr/local/bin/nova-compute --config-file /etc/nova/nova.conf & echo $! >/opt/stack/status/stack/n-cpu.pid; fg || echo n-cpu failed to start | tee \"/opt/stack/status/stack/n-cpu.failure\" " ]) time.sleep(3) if n_api_screen_exists: start_screen_process('n-api', [ "export PYTHONPATH=\"${PYTHONPATH}:/opt/OpenvStorage\" ", "/usr/local/bin/nova-api & echo $! >/opt/stack/status/stack/n-api.pid; fg || echo n-api failed to start | tee \"/opt/stack/status/stack/n-api.failure\" " ]) time.sleep(3) if c_api_screen_exists: start_screen_process('c-api', [ "/usr/local/bin/cinder-api --config-file /etc/cinder/cinder.conf & echo $! >/opt/stack/status/stack/c-api.pid; fg || echo c-api failed to start | tee \"/opt/stack/status/stack/c-api.failure\" " ]) time.sleep(3) except SystemExit as se: # failed command or non-zero exit codes raise SystemExit raise RuntimeError(str(se)) else: for service_name in OSManager.get_openstack_services(): if ServiceManager.has_service(service_name, self.client): try: ServiceManager.restart_service(service_name, self.client) except SystemExit as sex: self._logger.debug( 'Failed to restart service {0}. {1}'.format( service_name, sex)) time.sleep(3)
def create_cluster(cluster_name, ip, server_port=DEFAULT_SERVER_PORT, client_port=DEFAULT_CLIENT_PORT): """ Creates a cluster :param cluster_name: Name of the cluster :type cluster_name: str :param ip: IP address of the first node of the new cluster :type ip: str :param server_port: Port to be used by server :type server_port: int :param client_port: Port to be used by client :type client_port: int :return: None """ EtcdInstaller._logger.debug('Creating cluster "{0}" on {1}'.format( cluster_name, ip)) client = SSHClient(ip, username='******') target_name = 'ovs-etcd-{0}'.format(cluster_name) if ServiceManager.has_service( target_name, client) and ServiceManager.get_service_status( target_name, client)[0] is True: EtcdInstaller._logger.info( 'Service {0} already configured and running'.format( target_name)) return node_name = System.get_my_machine_id(client) data_dir = EtcdInstaller.DATA_DIR.format(cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' ServiceManager.add_service( base_name, client, params={ 'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(ip, server_port), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(ip, client_port), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1', client_port), 'INITIAL_CLUSTER': '{0}={1}'.format( node_name, EtcdInstaller.SERVER_URL.format(ip, server_port)), 'INITIAL_STATE': 'new', 'INITIAL_PEERS': '-initial-advertise-peer-urls {0}'.format( EtcdInstaller.SERVER_URL.format(ip, server_port)) }, target_name=target_name) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client, client_port=client_port) EtcdInstaller._logger.debug( 'Creating cluster "{0}" on {1} completed'.format(cluster_name, ip))
def gather_scrub_work(): """ Retrieve and execute scrub work :return: None """ ScheduledTaskController._logger.info('Gather Scrub - Started') scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: ScheduledTaskController._logger.info( 'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}' .format(storage_driver.storagerouter.ip, partition.path)) if storage_driver.storagerouter not in scrub_locations: try: sshclient = SSHClient(storage_driver.storagerouter) # Use ServiceManager(sshclient) to make sure ovs-workers are actually running if ServiceManager.get_service_status( 'workers', sshclient) is False: ScheduledTaskController._logger.warning( 'Gather Scrub - Storage Router {0:<15} - workers are not running' .format(storage_driver.storagerouter.ip)) else: scrub_locations[ storage_driver.storagerouter] = str( partition.path) except UnableToConnectException: ScheduledTaskController._logger.warning( 'Gather Scrub - Storage Router {0:<15} is not reachable' .format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError('No scrub locations found') vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) if len(vdisk_guids) == 0: ScheduledTaskController._logger.info( 'Gather Scrub - No scrub work needed'.format(len(vdisk_guids))) return ScheduledTaskController._logger.info( 'Gather Scrub - Checking {0} volumes for scrub work'.format( len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_storage_router = None local_scrub_location = None local_vdisks_to_scrub = [] result_set = {} storage_router_list = [] scrub_map = {} for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid ScheduledTaskController._logger.info( 'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks' .format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub))) if local is True: local_storage_router = storage_router local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set[storage_router. ip] = ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format( storage_router.machine_id)) storage_router_list.append(storage_router) scrub_map[storage_router.ip] = vdisk_guids_to_scrub # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish processed_guids = [] if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: try: processed_guids = ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub) except Exception as ex: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(local_storage_router.ip, ex)) all_results, failed_nodes = CeleryToolbox.manage_running_tasks( result_set, timesleep=60) # Check every 60 seconds if tasks are still running for ip, result in all_results.iteritems(): if isinstance(result, list): processed_guids.extend(result) else: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(ip, result)) result_set = {} for failed_node in failed_nodes: ScheduledTaskController._logger.warning( 'Scrubbing failed on node {0}. Will reschedule on another node.' .format(failed_node)) vdisk_guids_to_scrub = scrub_map[failed_node] rescheduled_work = False for storage_router, scrub_location in scrub_locations.items(): if storage_router.ip not in failed_nodes: if storage_router.machine_id != local_machineid: ScheduledTaskController._logger.info( 'Rescheduled scrub work from node {0} to node {1}.' .format(failed_node, storage_router.ip)) result_set[ storage_router. ip] = ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_location, vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format( storage_router.machine_id)) storage_router_list.append(storage_router) rescheduled_work = True break if rescheduled_work is False: if local_scrub_location is not None: try: processed_guids.extend( ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=vdisk_guids_to_scrub)) except Exception as ex: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}' .format(ex)) else: ScheduledTaskController._logger.warning( 'No nodes left to reschedule work from node {0}'. format(failed_node)) if len(result_set) > 0: all_results2, failed_nodes = CeleryToolbox.manage_running_tasks( result_set, timesleep=60 ) # Check every 60 seconds if tasks are still running for ip, result in all_results2.iteritems(): if isinstance(result, list): processed_guids.extend(result) else: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(ip, result)) if len(set(processed_guids)) != len(vdisk_guids) or set( processed_guids).difference(vdisk_guids): raise RuntimeError('Scrubbing failed for 1 or more storagerouters') ScheduledTaskController._logger.info('Gather Scrub - Finished')
def post_upgrade(client): """ Upgrade actions after the new packages have actually been installed :param client: SSHClient object :return: None """ # If we can reach Etcd with a valid config, and there's still an old config file present, delete it from ovs.extensions.db.etcd.configuration import EtcdConfiguration path = '/opt/OpenvStorage/config/ovs.json' if EtcdConfiguration.exists( '/ovs/framework/cluster_id') and client.file_exists(path): client.file_delete(path) # Migrate volumedriver & albaproxy configuration files import uuid from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.system import System with remote(client.ip, [StorageDriverConfiguration, os, open, json, System], username='******') as rem: configuration_dir = '{0}/storagedriver/storagedriver'.format( EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) host_id = rem.System.get_my_machine_id() if rem.os.path.exists(configuration_dir): for storagedriver in StorageDriverList.get_storagedrivers_by_storagerouter( rem.System.get_my_storagerouter().guid): vpool = storagedriver.vpool if storagedriver.alba_proxy is not None: config_tree = '/ovs/vpools/{0}/proxies/{1}/config/{{0}}'.format( vpool.guid, storagedriver.alba_proxy.guid) # ABM config abm_config = '{0}/{1}_alba.cfg'.format( configuration_dir, vpool.name) if rem.os.path.exists(abm_config): with rem.open(abm_config) as config_file: EtcdConfiguration.set( config_tree.format('abm'), config_file.read(), raw=True) rem.os.remove(abm_config) # Albaproxy config alba_config = '{0}/{1}_alba.json'.format( configuration_dir, vpool.name) if rem.os.path.exists(alba_config): with rem.open(alba_config) as config_file: config = rem.json.load(config_file) del config['albamgr_cfg_file'] config[ 'albamgr_cfg_url'] = 'etcd://127.0.0.1:2379{0}'.format( config_tree.format('abm')) EtcdConfiguration.set( config_tree.format('main'), json.dumps(config, indent=4), raw=True) params = { 'VPOOL_NAME': vpool.name, 'VPOOL_GUID': vpool.guid, 'PROXY_ID': storagedriver.alba_proxy.guid } alba_proxy_service = 'ovs-albaproxy_{0}'.format( vpool.name) ServiceManager.add_service( name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) rem.os.remove(alba_config) # Volumedriver config current_file = '{0}/{1}.json'.format( configuration_dir, vpool.name) if rem.os.path.exists(current_file): readcache_size = 0 with rem.open(current_file) as config_file: config = rem.json.load(config_file) config['distributed_transaction_log'] = {} config['distributed_transaction_log'][ 'dtl_transport'] = config['failovercache'][ 'failovercache_transport'] config['distributed_transaction_log'][ 'dtl_path'] = config['failovercache'][ 'failovercache_path'] config['volume_manager'][ 'dtl_throttle_usecs'] = config['volume_manager'][ 'foc_throttle_usecs'] del config['failovercache'] del config['volume_manager']['foc_throttle_usecs'] sdc = rem.StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) sdc.configuration = config sdc.save(reload_config=False) for mountpoint in config['content_addressed_cache'][ 'clustercache_mount_points']: readcache_size += int(mountpoint['size'].replace( 'KiB', '')) params = { 'VPOOL_MOUNTPOINT': storagedriver.mountpoint, 'HYPERVISOR_TYPE': storagedriver.storagerouter.pmachine.hvtype, 'VPOOL_NAME': vpool.name, 'CONFIG_PATH': sdc.remote_path, 'UUID': str(uuid.uuid4()), 'OVS_UID': client.run('id -u ovs').strip(), 'OVS_GID': client.run('id -g ovs').strip(), 'KILL_TIMEOUT': str( int(readcache_size / 1024.0 / 1024.0 / 6.0 + 30)) } vmware_mode = EtcdConfiguration.get( '/ovs/framework/hosts/{0}/storagedriver|vmware_mode' .format(host_id)) dtl_service = 'ovs-dtl_{0}'.format(vpool.name) ServiceManager.add_service(name='ovs-dtl', params=params, client=client, target_name=dtl_service) if vpool.backend_type.code == 'alba': alba_proxy_service = 'ovs-albaproxy_{0}'.format( vpool.name) dependencies = [alba_proxy_service] else: dependencies = None if vmware_mode == 'ganesha': template_name = 'ovs-ganesha' else: template_name = 'ovs-volumedriver' voldrv_service = 'ovs-volumedriver_{0}'.format( vpool.name) ServiceManager.add_service( name=template_name, params=params, client=client, target_name=voldrv_service, additional_dependencies=dependencies) rem.os.remove(current_file) # Ganesha config, if available current_file = '{0}/{1}_ganesha.conf'.format( configuration_dir, vpool.name) if rem.os.path.exists(current_file): sdc = rem.StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) contents = '' for template in ['ganesha-core', 'ganesha-export']: contents += client.file_read( '/opt/OpenvStorage/config/templates/{0}.conf'. format(template)) params = { 'VPOOL_NAME': vpool.name, 'VPOOL_MOUNTPOINT': '/mnt/{0}'.format(vpool.name), 'CONFIG_PATH': sdc.remote_path, 'NFS_FILESYSTEM_ID': storagedriver.storagerouter.ip.split('.', 2)[-1] } for key, value in params.iteritems(): contents = contents.replace( '<{0}>'.format(key), value) client.file_write(current_file, contents)
def configure_host(self, ip): if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False: self._logger.warning( 'Configure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed' ) return # 1. Get Driver code self._logger.info('*** Configuring host with IP {0} ***'.format(ip)) self._logger.info(' Copy driver code') remote_driver = "/opt/OpenvStorage/config/templates/cinder-volume-driver/{0}/openvstorage.py".format( self._stack_version) remote_version = '0.0.0' existing_version = '0.0.0' try: from cinder.volume.drivers import openvstorage if hasattr(openvstorage, 'OVSVolumeDriver'): existing_version = getattr(openvstorage.OVSVolumeDriver, 'VERSION', '0.0.0') except ImportError: pass for line in self.client.file_read(remote_driver).splitlines(): if 'VERSION = ' in line: remote_version = line.split('VERSION = ')[-1].strip().replace( "'", "").replace('"', "") break nova_base_path = self._get_base_path('nova') cinder_base_path = self._get_base_path('cinder') if self._is_devstack is True: local_driver = '{0}/volume/drivers/openvstorage.py'.format( cinder_base_path) else: local_driver = '{0}/cinder/volume/drivers/openvstorage.py'.format( self._driver_location) if remote_version > existing_version: self._logger.debug( 'Updating existing driver using {0} from version {1} to version {2}' .format(remote_driver, existing_version, remote_version)) self.client.run('cp -f {0} {1}'.format(remote_driver, local_driver)) else: self._logger.debug('Using driver {0} version {1}'.format( local_driver, existing_version)) # 2. Configure users and groups self._logger.info(' Add users to group ovs') users = ['libvirt-qemu', 'stack' ] if self._is_devstack is True else self._openstack_users for user in users: self.client.run('usermod -a -G ovs {0}'.format(user)) # 3. Apply patches self._logger.info(' Applying patches') if self._stack_version in ('liberty', 'mitaka', 'newton'): try: import os_brick cinder_brick_initiator_file = "{0}/initiator/connector.py".format( os.path.dirname(os_brick.__file__)) except ImportError: cinder_brick_initiator_file = '' if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume/volume.py'.format( nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume/volume.py'.format( self._driver_location) else: cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format( self._driver_location) if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume.py'.format( nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format( self._driver_location) if self._is_devstack is True: nova_driver_file = '{0}/virt/libvirt/driver.py'.format( nova_base_path) else: nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format( self._driver_location) self._logger.info(' Patching file {0}'.format(nova_volume_file)) file_contents = self.client.file_read(nova_volume_file) if 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):' not in file_contents: file_contents += ''' class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver): def __init__(self, connection): super(LibvirtFileVolumeDriver, self).__init__(connection, is_block_dev=False) def get_config(self, connection_info, disk_info): conf = super(LibvirtFileVolumeDriver, self).get_config(connection_info, disk_info) conf.source_type = 'file' conf.source_path = connection_info['data']['device_path'] return conf ''' self.client.file_write(nova_volume_file, file_contents) self._logger.info(' Patching file {0}'.format(nova_driver_file)) file_contents = self.client.file_read(nova_driver_file) if self._stack_version in ('liberty', 'mitaka'): check_line = 'local=nova.virt.libvirt.volume.volume.LibvirtVolumeDriver' new_line = 'file=nova.virt.libvirt.volume.volume.LibvirtFileVolumeDriver' else: check_line = 'local=nova.virt.libvirt.volume.LibvirtVolumeDriver' new_line = 'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver' if new_line not in file_contents: for line in file_contents.splitlines(): if check_line in line: stripped_line = line.rstrip() whitespaces = len(stripped_line) - len( stripped_line.lstrip()) new_line = "{0}'{1}',\n".format(' ' * whitespaces, new_line) fc = file_contents[:file_contents. index(line)] + new_line + file_contents[ file_contents.index(line):] self.client.file_write(nova_driver_file, "".join(fc)) break if os.path.exists(cinder_brick_initiator_file): # fix brick/upload to glance self._logger.info( ' Patching file {0}'.format(cinder_brick_initiator_file)) if self._stack_version in ('liberty', 'mitaka', 'newton'): self.client.run( """sed -i 's/elif protocol == LOCAL:/elif protocol in [LOCAL, "FILE"]:/g' {0}""" .format(cinder_brick_initiator_file)) else: self.client.run( """sed -i 's/elif protocol == "LOCAL":/elif protocol in ["LOCAL", "FILE"]:/g' {0}""" .format(cinder_brick_initiator_file)) # 4. Configure messaging driver self._logger.info(' - Configure messaging driver') nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' with remote(ip, [RawConfigParser, open], 'root') as rem: for config_file, driver in { self._NOVA_CONF: nova_messaging_driver, self._CINDER_CONF: cinder_messaging_driver }.iteritems(): changed = False cfg = rem.RawConfigParser() cfg.read([config_file]) if cfg.has_option("DEFAULT", "notification_driver"): if cfg.get("DEFAULT", "notification_driver") != driver: changed = True cfg.set("DEFAULT", "notification_driver", driver) else: changed = True cfg.set("DEFAULT", "notification_driver", driver) if cfg.has_option("DEFAULT", "notification_topics"): notification_topics = cfg.get( "DEFAULT", "notification_topics").split(",") if "notifications" not in notification_topics: notification_topics.append("notifications") changed = True cfg.set("DEFAULT", "notification_topics", ",".join(notification_topics)) else: changed = True cfg.set("DEFAULT", "notification_topics", "notifications") if config_file == self._NOVA_CONF: for param, value in { 'notify_on_any_change': 'True', 'notify_on_state_change': 'vm_and_task_state' }.iteritems(): if not cfg.has_option("DEFAULT", param): changed = True cfg.set("DEFAULT", param, value) if changed is True: with rem.open(config_file, "w") as fp: cfg.write(fp) # 5. Enable events consumer self._logger.info(' - Enabling events consumer service') service_name = 'openstack-events-consumer' if not ServiceManager.has_service(service_name, self.client): ServiceManager.add_service(service_name, self.client) ServiceManager.enable_service(service_name, self.client) ServiceManager.start_service(service_name, self.client)
def unconfigure_host(self, ip): if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False: self._logger.warning( 'Unconfigure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed' ) return # 1. Remove driver code self._logger.info('*** Unconfiguring host with IP {0} ***'.format(ip)) self._logger.info(' Removing driver code') if self._is_devstack is True: self.client.file_delete(self._devstack_driver) else: self.client.file_delete( '{0}/cinder/volume/drivers/openvstorage.py'.format( self._driver_location)) # 2. Removing users from group self._logger.info(' Removing users from group ovs') for user in ['libvirt-qemu', 'stack' ] if self._is_devstack is True else self._openstack_users: self.client.run('deluser {0} ovs'.format(user)) # 3. Revert patches self._logger.info(' Reverting patches') nova_base_path = self._get_base_path('nova') cinder_base_path = self._get_base_path('cinder') if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume.py'.format( nova_base_path) nova_driver_file = '{0}/virt/libvirt/driver.py'.format( nova_base_path) cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format( cinder_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format( self._driver_location) nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format( self._driver_location) cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format( self._driver_location) self._logger.info( ' Reverting patched file: {0}'.format(nova_volume_file)) new_contents = [] skip_class = False for line in self.client.file_read(nova_volume_file).splitlines(): if line.startswith( 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):'): skip_class = True continue if line.startswith('class'): skip_class = False if skip_class is False: new_contents.append(line) self.client.file_write(nova_volume_file, "".join(new_contents)) self._logger.info( ' Reverting patched file: {0}'.format(nova_driver_file)) new_contents = [] for line in self.client.file_read(nova_driver_file).splitlines(): stripped_line = line.strip() if stripped_line.startswith( "'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver'"): continue new_contents.append(line) self.client.file_write(nova_driver_file, "".join(new_contents)) if os.path.exists(cinder_brick_initiator_file): self._logger.info(' Reverting patched file: {0}'.format( cinder_brick_initiator_file)) self.client.run( """sed -i 's/elif protocol in ["LOCAL", "FILE"]:/elif protocol == "LOCAL":/g' {0}""" .format(cinder_brick_initiator_file)) # 4. Unconfigure messaging driver self._logger.info(' Unconfiguring messaging driver') nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' with remote(ip, [RawConfigParser, open], 'root') as rem: for config_file, driver in { self._NOVA_CONF: nova_messaging_driver, self._CINDER_CONF: cinder_messaging_driver }.iteritems(): cfg = rem.RawConfigParser() cfg.read([config_file]) if cfg.has_option("DEFAULT", "notification_driver"): cfg.remove_option("DEFAULT", "notification_driver") if cfg.has_option("DEFAULT", "notification_topics"): notification_topics = cfg.get( "DEFAULT", "notification_topics").split(",") if "notifications" in notification_topics: notification_topics.remove("notifications") cfg.set("DEFAULT", "notification_topics", ",".join(notification_topics)) if config_file == self._NOVA_CONF: for param, value in { 'notify_on_any_change': 'True', 'notify_on_state_change': 'vm_and_task_state' }.iteritems(): if cfg.has_option("DEFAULT", param): cfg.remove_option("DEFAULT", param) with rem.open(config_file, "w") as fp: cfg.write(fp) # 5. Disable events consumer self._logger.info(' Disabling events consumer') service_name = 'ovs-openstack-events-consumer' if ServiceManager.has_service(service_name, self.client): ServiceManager.stop_service(service_name, self.client) ServiceManager.disable_service(service_name, self.client) ServiceManager.remove_service(service_name, self.client)
def _deploy(config, filesystem, offline_nodes=None): """ Deploys a complete cluster: Distributing the configuration files, creating directories and services """ if os.environ.get('RUNNING_UNITTESTS') == 'True': if filesystem is True: raise NotImplementedError( 'At this moment, there is no support for unittesting filesystem backend Arakoon clusters' ) ArakoonInstaller._logger.debug('Deploying cluster {0}'.format( config.cluster_id)) if offline_nodes is None: offline_nodes = [] for node in config.nodes: if node.ip in offline_nodes: continue ArakoonInstaller._logger.debug( ' Deploying cluster {0} on {1}'.format( config.cluster_id, node.ip)) root_client = SSHClient(node.ip, username='******') # Distributes a configuration file to all its nodes config.write_config(node.ip) # Create dirs as root because mountpoint /mnt/cache1 is typically owned by root abs_paths = {node.tlog_dir, node.home} # That's a set if node.log_sinks.startswith('/'): abs_paths.add(os.path.dirname(os.path.abspath(node.log_sinks))) if node.crash_log_sinks.startswith('/'): abs_paths.add( os.path.dirname(os.path.abspath(node.crash_log_sinks))) abs_paths = list(abs_paths) root_client.dir_create(abs_paths) root_client.dir_chmod(abs_paths, 0755, recursive=True) root_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) # Creates services for/on all nodes in the config if config.filesystem is True: config_path = config.config_path else: config_path = Configuration.get_configuration_path( config.config_path) base_name = 'ovs-arakoon' target_name = 'ovs-arakoon-{0}'.format(config.cluster_id) ServiceManager.add_service( base_name, root_client, params={ 'CLUSTER': config.cluster_id, 'NODE_ID': node.name, 'CONFIG_PATH': config_path, 'STARTUP_DEPENDENCY': 'started ovs-watcher-config' if filesystem is False else '(local-filesystems and started networking)' }, target_name=target_name) ArakoonInstaller._logger.debug( ' Deploying cluster {0} on {1} completed'.format( config.cluster_id, node.ip))
def is_host_configured(self, ip): if ( self._is_devstack is False and self._is_openstack is False ) or self._cinder_installed is False or self._nova_installed is False: self._logger.warning( 'Host configured: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed' ) return False # 1. Check driver code if self._is_devstack is True: if not self.client.file_exists(filename=self._devstack_driver): self._logger.info(' File "{0}" does not exist'.format( self._devstack_driver)) return False else: if not self.client.file_exists( filename='{0}/cinder/volume/drivers/openvstorage.py'. format(self._driver_location)): self._logger.info( ' File "{0}/cinder/volume/drivers/openvstorage.py" does not exist' .format(self._driver_location)) return False # 2. Check configured users ovs_id = self.client.run('id -u ovs') if not ovs_id: self._logger.info('Failed to determine the OVS user group ID') return False users = ['libvirt-qemu', 'stack' ] if self._is_devstack is True else self._openstack_users for user in users: if '{0}(ovs)'.format(ovs_id) not in self.client.run( 'id -a {0}'.format(user)): self._logger.info( 'User "{0}" is not part of the OVS user group') return False # 3. Check patches nova_base_path = self._get_base_path('nova') cinder_base_path = self._get_base_path('cinder') if self._stack_version in ('liberty', 'mitaka', 'newton'): try: import os_brick cinder_brick_initiator_file = "{0}/initiator/connector.py".format( os.path.dirname(os_brick.__file__)) except ImportError: cinder_brick_initiator_file = '' if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume/volume.py'.format( nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume/volume.py'.format( self._driver_location) else: if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume.py'.format( nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format( self._driver_location) cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format( cinder_base_path) if self._is_devstack is True: nova_driver_file = '{0}/virt/libvirt/driver.py'.format( nova_base_path) else: nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format( self._driver_location) file_contents = self.client.file_read(nova_volume_file) if 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):' not in file_contents: self._logger.info('File "{0}" is not configured properly'.format( nova_volume_file)) return False if self._stack_version in ('liberty', 'mitaka'): check_line = 'file=nova.virt.libvirt.volume.volume.LibvirtFileVolumeDriver' else: check_line = 'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver' file_contents = self.client.file_read(nova_driver_file) if check_line not in file_contents: self._logger.info('File "{0}" is not configured properly'.format( nova_driver_file)) return False if os.path.exists(cinder_brick_initiator_file): file_contents = self.client.file_read(cinder_brick_initiator_file) if self._stack_version in ('liberty', 'mitaka', 'newton'): if 'elif protocol in [LOCAL, "FILE"]:' not in file_contents: self._logger.info( 'File "{0}" is not configured properly'.format( cinder_brick_initiator_file)) return False else: if 'elif protocol in ["LOCAL", "FILE"]:' not in file_contents: self._logger.info( 'File "{0}" is not configured properly'.format( cinder_brick_initiator_file)) return False # 4. Check messaging driver configuration nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' host_configured = True with remote(ip, [RawConfigParser], 'root') as rem: for config_file, driver in { self._NOVA_CONF: nova_messaging_driver, self._CINDER_CONF: cinder_messaging_driver }.iteritems(): cfg = rem.RawConfigParser() cfg.read([config_file]) host_configured &= cfg.get("DEFAULT", "notification_driver") == driver host_configured &= "notifications" in cfg.get( "DEFAULT", "notification_topics") if config_file == self._NOVA_CONF: host_configured &= cfg.get( "DEFAULT", "notify_on_any_change") == "True" host_configured &= cfg.get( "DEFAULT", "notify_on_state_change") == "vm_and_task_state" if host_configured is False: self._logger.info( 'Nova and/or Cinder configuration files are not configured properly' ) return host_configured # 5. Check events consumer service service_name = 'ovs-openstack-events-consumer' if not (ServiceManager.has_service(service_name, self.client) and ServiceManager.get_service_status(service_name, self.client) is True): self._logger.info( 'Service "{0}" is not configured properly'.format( service_name)) return False return True
def execute_scrub_work(queue, vpool, scrub_info, error_messages): """ Executes scrub work for a given vDisk queue and vPool, based on scrub_info :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool) :type queue: Queue :param vpool: the vPool object of the vDisks :type vpool: VPool :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter that needs to do the work :type scrub_info: dict :param error_messages: A list of error messages to be filled :type error_messages: list :return: a list of error messages :rtype: list """ def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics('info') vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs client = None lock_time = 5 * 60 storagerouter = scrub_info['storage_router'] scrub_directory = '{0}/scrub_work_{1}_{2}'.format( scrub_info['scrub_path'], vpool.name, storagerouter.name) scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format( vpool.guid, storagerouter.guid) backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format( vpool.guid, storagerouter.guid) alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format( vpool.name, storagerouter.name) # Deploy a proxy try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_create(scrub_directory) client.dir_chmod( scrub_directory, 0777 ) # Celery task executed by 'ovs' user and should be able to write in it if ServiceManager.has_service( name=alba_proxy_service, client=client ) is True and ServiceManager.get_service_status( name=alba_proxy_service, client=client) is True: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) scrub_config = Configuration.get(scrub_config_key) else: machine_id = System.get_my_machine_id(client) port_range = Configuration.get( '/ovs/framework/hosts/{0}/ports|storagedriver'.format( machine_id)) port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0] # Scrub config # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini', # u'fragment_cache': [u'none'], # u'ips': [u'127.0.0.1'], # u'log_level': u'info', # u'manifest_cache_size': 17179869184, # u'port': 0, # u'transport': u'tcp'} # Backend config # {u'alba_connection_host': u'10.100.193.155', # u'alba_connection_port': 26204, # u'alba_connection_preset': u'preset', # u'alba_connection_timeout': 15, # u'alba_connection_transport': u'TCP', # u'backend_interface_retries_on_error': 5, # u'backend_interface_retry_backoff_multiplier': 2.0, # u'backend_interface_retry_interval_secs': 1, # u'backend_type': u'ALBA'} scrub_config = Configuration.get( 'ovs/vpools/{0}/proxies/scrub/generic_scrub'.format( vpool.guid)) scrub_config['port'] = port scrub_config['transport'] = 'tcp' Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True) params = { 'VPOOL_NAME': vpool.name, 'LOG_SINK': LogHandler.get_sink_path('alba_proxy'), 'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key) } ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) ServiceManager.start_service(name=alba_proxy_service, client=client) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) backend_config = Configuration.get( 'ovs/vpools/{0}/hosts/{1}/config'.format( vpool.guid, vpool.storagedrivers[0].storagedriver_id ))['backend_connection_manager'] backend_config['alba_connection_host'] = '127.0.0.1' backend_config['alba_connection_port'] = scrub_config['port'] Configuration.set( backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format( vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message) if client is not None and ServiceManager.has_service( name=alba_proxy_service, client=client) is True: if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ServiceManager.stop_service(name=alba_proxy_service, client=client) ServiceManager.remove_service(name=alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) try: # Empty the queue with vDisks to scrub with remote(storagerouter.ip, [VDisk]) as rem: while True: vdisk = None vdisk_guid = queue.get(False) try: # Check MDS master is local. Trigger MDS handover if necessary vdisk = rem.VDisk(vdisk_guid) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}' .format(vpool.name, storagerouter.name, vdisk.name, scrub_directory)) configs = _verify_mds_config(current_vdisk=vdisk) storagedriver = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover' .format(vpool.name, storagerouter.name, vdisk.name)) MDSServiceController.ensure_safety( VDisk(vdisk_guid) ) # Do not use a remote VDisk instance here configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.warning( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local' .format(vpool.name, storagerouter.name, vdisk.name)) continue # Do the actual scrubbing with vdisk.storagedriver_client.make_locked_client( str(vdisk.volume_id)) as locked_client: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work' .format(vpool.name, storagerouter.name, vdisk.name)) work_units = locked_client.get_scrubbing_workunits( ) for work_unit in work_units: res = locked_client.scrub( work_unit=work_unit, scratch_dir=scrub_directory, log_sinks=[ LogHandler.get_sink_path( 'scrubber', allow_override=True) ], backend_config=Configuration. get_configuration_path(backend_config_key)) locked_client.apply_scrubbing_result( scrubbing_work_result=res) if work_units: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied' .format(vpool.name, storagerouter.name, vdisk.name, len(work_units))) else: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required' .format(vpool.name, storagerouter.name, vdisk.name)) except Exception: if vdisk is None: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format( vpool.name, storagerouter.name, vdisk_guid) else: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format( vpool.name, storagerouter.name, vdisk.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) except Empty: # Raised when all items have been fetched from the queue ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed' .format(vpool.name, storagerouter.name)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format( vpool.name, storagerouter.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) # Delete the proxy again try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_delete(scrub_directory) if ServiceManager.has_service(alba_proxy_service, client=client): ServiceManager.stop_service(alba_proxy_service, client=client) ServiceManager.remove_service(alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format( vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message)
def extend_cluster(master_ip, new_ip, cluster_name, server_port=DEFAULT_SERVER_PORT, client_port=DEFAULT_CLIENT_PORT): """ Extends a cluster to a given new node :param master_ip: IP of one of the already existing nodes :type master_ip: str :param new_ip: IP address of the node to be added :type new_ip: str :param cluster_name: Name of the cluster to be extended :type cluster_name: str :param server_port: Port to be used by server :type server_port: int :param client_port: Port to be used by client :type client_port: int """ EtcdInstaller._logger.debug('Extending cluster "{0}" from {1} to {2}'.format(cluster_name, master_ip, new_ip)) master_client = SSHClient(master_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, master_client, client_port=client_port): raise RuntimeError('Cluster "{0}" unhealthy, aborting extend'.format(cluster_name)) command = 'etcdctl member list' new_server_url = EtcdInstaller.SERVER_URL.format(new_ip, server_port) if client_port != EtcdInstaller.DEFAULT_CLIENT_PORT: command = 'etcdctl --peers={0}:{1} member list'.format(master_ip, client_port) cluster_members = master_client.run(command).splitlines() for cluster_member in cluster_members: if new_server_url in cluster_member: EtcdInstaller._logger.info('Node {0} already member of etcd cluster'.format(new_ip)) return current_cluster = [] for item in cluster_members: info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() current_cluster.append('{0}={1}'.format(info['name'], info['peer'])) new_client = SSHClient(new_ip, username='******') node_name = System.get_my_machine_id(new_client) current_cluster.append('{0}={1}'.format(node_name, new_server_url)) data_dir = EtcdInstaller.DATA_DIR.format(cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name) abs_paths = [data_dir, wal_dir] new_client.dir_delete(abs_paths) new_client.dir_create(abs_paths) new_client.dir_chmod(abs_paths, 0755, recursive=True) new_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) EtcdInstaller.stop(cluster_name, new_client) # Stop a possible proxy service ServiceManager.add_service(base_name, new_client, params={'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': new_server_url, 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip, client_port), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1', client_port), 'INITIAL_CLUSTER': ','.join(current_cluster), 'INITIAL_STATE': 'existing', 'INITIAL_PEERS': ''}, target_name=target_name) add_command = 'etcdctl member add {0} {1}'.format(node_name, new_server_url) if client_port != EtcdInstaller.DEFAULT_CLIENT_PORT: add_command = 'etcdctl --peers={0}:{1} member add {2} {3}'.format(master_ip, client_port, node_name, new_server_url) master_client.run(add_command) EtcdInstaller.start(cluster_name, new_client) EtcdInstaller.wait_for_cluster(cluster_name, new_client, client_port=client_port) EtcdInstaller._logger.debug('Extending cluster "{0}" from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))