def add_services(client, node_type, logger): """ Add the services required by the OVS cluster :param client: Client on which to add the services :type client: ovs.extensions.generic.sshclient.SSHClient :param node_type: Type of node ('master' or 'extra') :type node_type: str :param logger: Logger object used for logging :type logger: ovs.log.log_handler.LogHandler :return: None """ Toolbox.log(logger=logger, messages='Adding services') services = {} worker_queue = System.get_my_machine_id(client=client) if node_type == 'master': worker_queue += ',ovs_masters' services.update({'memcached': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue}, 'rabbitmq-server': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue}, 'scheduled-tasks': {}, 'webapp-api': {}, 'volumerouter-consumer': {}}) services.update({'workers': {'WORKER_QUEUE': worker_queue}, 'watcher-framework': {}}) for service_name, params in services.iteritems(): if not ServiceManager.has_service(service_name, client): Toolbox.log(logger=logger, messages='Adding service {0}'.format(service_name)) ServiceManager.add_service(name=service_name, params=params, client=client)
def _deploy(config): """ Deploys a complete cluster: Distributing the configuration files, creating directories and services """ logger.debug('Deploying cluster {0}'.format(config.cluster_id)) for node in config.nodes: logger.debug(' Deploying cluster {0} on {1}'.format( config.cluster_id, node.ip)) ovs_client = SSHClient(node.ip) root_client = SSHClient(node.ip, username='******') # Distributes a configuration file to all its nodes config.write_config(ovs_client) # Create dirs as root because mountpoint /mnt/cache1 is typically owned by root abs_paths = [node.log_dir, node.tlog_dir, node.home] root_client.dir_create(abs_paths) root_client.dir_chmod(abs_paths, 0755, recursive=True) root_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) # Creates services for/on all nodes in the config base_name = 'ovs-arakoon' target_name = 'ovs-arakoon-{0}'.format(config.cluster_id) ServiceManager.prepare_template(base_name, target_name, ovs_client) ServiceManager.add_service(target_name, root_client, params={'CLUSTER': config.cluster_id}) logger.debug(' Deploying cluster {0} on {1} completed'.format( config.cluster_id, node.ip))
def _deploy(config, offline_nodes=None): """ Deploys a complete cluster: Distributing the configuration files, creating directories and services """ ArakoonInstaller._logger.debug('Deploying cluster {0}'.format(config.cluster_id)) if offline_nodes is None: offline_nodes = [] for node in config.nodes: if node.ip in offline_nodes: continue ArakoonInstaller._logger.debug(' Deploying cluster {0} on {1}'.format(config.cluster_id, node.ip)) root_client = SSHClient(node.ip, username='******') # Distributes a configuration file to all its nodes config.write_config() # Create dirs as root because mountpoint /mnt/cache1 is typically owned by root abs_paths = [node.log_dir, node.tlog_dir, node.home] if not root_client.dir_exists(abs_paths): root_client.dir_create(abs_paths) root_client.dir_chmod(abs_paths, 0755, recursive=True) root_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) # Creates services for/on all nodes in the config base_name = 'ovs-arakoon' target_name = 'ovs-arakoon-{0}'.format(config.cluster_id) ServiceManager.add_service(base_name, root_client, params={'CLUSTER': config.cluster_id, 'NODE_ID': node.name, 'CONFIG_PATH': ArakoonInstaller.ETCD_CONFIG_PATH.format(config.cluster_id)}, target_name=target_name) ArakoonInstaller._logger.debug(' Deploying cluster {0} on {1} completed'.format(config.cluster_id, node.ip))
def _setup_proxy(initial_cluster, slave_client, cluster_name): base_name = 'ovs-etcd-proxy' target_name = 'ovs-etcd-{0}'.format(cluster_name) EtcdInstaller.stop(cluster_name, slave_client) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] slave_client.dir_delete(abs_paths) slave_client.dir_create(data_dir) slave_client.dir_chmod(data_dir, 0755, recursive=True) slave_client.dir_chown(data_dir, 'ovs', 'ovs', recursive=True) ServiceManager.add_service( base_name, slave_client, params={ 'CLUSTER': cluster_name, 'DATA_DIR': data_dir, 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': initial_cluster }, target_name=target_name) EtcdInstaller.start(cluster_name, slave_client) EtcdInstaller.wait_for_cluster(cluster_name, slave_client)
def _setup_proxy(initial_cluster, slave_client, cluster_name, force=False): base_name = 'ovs-etcd-proxy' target_name = 'ovs-etcd-{0}'.format(cluster_name) if force is False and ServiceManager.has_service(target_name, slave_client) and \ ServiceManager.get_service_status(target_name, slave_client) is True: logger.info('Service {0} already configured and running'.format(target_name)) return EtcdInstaller.stop(cluster_name, slave_client) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] slave_client.dir_delete(abs_paths) slave_client.dir_create(data_dir) slave_client.dir_chmod(data_dir, 0755, recursive=True) slave_client.dir_chown(data_dir, 'ovs', 'ovs', recursive=True) ServiceManager.add_service(base_name, slave_client, params={'CLUSTER': cluster_name, 'DATA_DIR': data_dir, 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': initial_cluster}, target_name=target_name) EtcdInstaller.start(cluster_name, slave_client) EtcdInstaller.wait_for_cluster(cluster_name, slave_client)
def _deploy(config): """ Deploys a complete cluster: Distributing the configuration files, creating directories and services """ logger.debug("Deploying cluster {0}".format(config.cluster_id)) for node in config.nodes: logger.debug(" Deploying cluster {0} on {1}".format(config.cluster_id, node.ip)) ovs_client = SSHClient(node.ip) root_client = SSHClient(node.ip, username="******") # Distributes a configuration file to all its nodes config.write_config(ovs_client) # Create dirs as root because mountpoint /mnt/cache1 is typically owned by root abs_paths = [node.log_dir, node.tlog_dir, node.home] root_client.dir_create(abs_paths) root_client.dir_chmod(abs_paths, 0755, recursive=True) root_client.dir_chown(abs_paths, "ovs", "ovs", recursive=True) # Creates services for/on all nodes in the config base_name = "ovs-arakoon" target_name = "ovs-arakoon-{0}".format(config.cluster_id) ServiceManager.prepare_template(base_name, target_name, ovs_client) ServiceManager.add_service(target_name, root_client, params={"CLUSTER": config.cluster_id}) logger.debug(" Deploying cluster {0} on {1} completed".format(config.cluster_id, node.ip))
def extend_cluster(master_ip, new_ip, cluster_name): """ Extends a cluster to a given new node :param cluster_name: Name of the cluster to be extended :param new_ip: IP address of the node to be added :param master_ip: IP of one of the already existing nodes """ logger.debug('Extending cluster "{0}" from {1} to {2}'.format(cluster_name, master_ip, new_ip)) client = SSHClient(master_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, client): raise RuntimeError('Cluster "{0}" unhealthy, aborting extend'.format(cluster_name)) cluster_members = client.run('etcdctl member list').splitlines() for cluster_member in cluster_members: if EtcdInstaller.SERVER_URL.format(new_ip) in cluster_member: logger.info('Node {0} already member of etcd cluster'.format(new_ip)) return current_cluster = [] for item in client.run('etcdctl member list').splitlines(): info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() current_cluster.append('{0}={1}'.format(info['name'], info['peer'])) client = SSHClient(new_ip, username='******') node_name = System.get_my_machine_id(client) current_cluster.append('{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(new_ip))) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) EtcdInstaller.stop(cluster_name, client) # Stop a possible proxy service ServiceManager.add_service(base_name, client, params={'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(new_ip), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': ','.join(current_cluster), 'INITIAL_STATE': 'existing', 'INITIAL_PEERS': ''}, target_name=target_name) master_client = SSHClient(master_ip, username='******') master_client.run('etcdctl member add {0} {1}'.format(node_name, EtcdInstaller.SERVER_URL.format(new_ip))) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client) logger.debug('Extending cluster "{0}" from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
def create_cluster(cluster_name, ip): """ Creates a cluster :param base_dir: Base directory that should contain the data :param ip: IP address of the first node of the new cluster :param cluster_name: Name of the cluster """ logger.debug('Creating cluster "{0}" on {1}'.format(cluster_name, ip)) client = SSHClient(ip, username='******') node_name = System.get_my_machine_id(client) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) ServiceManager.add_service( base_name, client, params={ 'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(ip), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(ip), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': '{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(ip)), 'INITIAL_STATE': 'new', 'INITIAL_PEERS': '-initial-advertise-peer-urls {0}'.format( EtcdInstaller.SERVER_URL.format(ip)) }, target_name=target_name) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client) logger.debug('Creating cluster "{0}" on {1} completed'.format( cluster_name, ip))
def _enable_openstack_events_consumer(self): """ Enable service ovs-openstack-events-consumer """ from ovs.extensions.services.service import ServiceManager service_name = 'ovs-openstack-events-consumer' if not ServiceManager.has_service(service_name, self.client): ServiceManager.add_service(service_name, self.client) ServiceManager.enable_service(service_name, self.client) ServiceManager.start_service(service_name, self.client)
def _enable_openstack_events_consumer(self): """ Enable service ovs-openstack-events-consumer """ from ovs.extensions.services.service import ServiceManager service_name = 'ovs-openstack-events-consumer' if not ServiceManager.has_service(service_name, self.client): ServiceManager.add_service(service_name, self.client) ServiceManager.enable_service(service_name, self.client) ServiceManager.start_service(service_name, self.client)
def _roll_out_dtl_services(vpool, storagerouters): """ Deploy and start the DTL service on all storagerouters :param storagerouters: StorageRouters to deploy and start a DTL service on :return: None """ service_name = 'dtl_{0}'.format(vpool.name) for sr in storagerouters.values(): client = SSHClient(sr, 'root') ServiceManager.add_service(name=service_name, client=client) ServiceManager.start_service(name=service_name, client=client)
def _roll_out_dtl_services(vpool, storagerouters): """ Deploy and start the DTL service on all storagerouters :param storagerouters: StorageRouters to deploy and start a DTL service on :return: None """ service_name = 'dtl_{0}'.format(vpool.name) for sr in storagerouters.values(): client = SSHClient(sr, 'root') ServiceManager.add_service(name=service_name, client=client) ServiceManager.start_service(name=service_name, client=client)
def create_cluster(cluster_name, ip, server_port=DEFAULT_SERVER_PORT, client_port=DEFAULT_CLIENT_PORT): """ Creates a cluster :param cluster_name: Name of the cluster :type cluster_name: str :param ip: IP address of the first node of the new cluster :type ip: str :param server_port: Port to be used by server :type server_port: int :param client_port: Port to be used by client :type client_port: int :return: None """ EtcdInstaller._logger.debug('Creating cluster "{0}" on {1}'.format(cluster_name, ip)) client = SSHClient(ip, username='******') target_name = 'ovs-etcd-{0}'.format(cluster_name) if ServiceManager.has_service(target_name, client) and ServiceManager.get_service_status(target_name, client) is True: EtcdInstaller._logger.info('Service {0} already configured and running'.format(target_name)) return node_name = System.get_my_machine_id(client) data_dir = EtcdInstaller.DATA_DIR.format(cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' ServiceManager.add_service(base_name, client, params={'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(ip, server_port), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(ip, client_port), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1', client_port), 'INITIAL_CLUSTER': '{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(ip, server_port)), 'INITIAL_STATE': 'new', 'INITIAL_PEERS': '-initial-advertise-peer-urls {0}'.format(EtcdInstaller.SERVER_URL.format(ip, server_port))}, target_name=target_name) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client, client_port=client_port) EtcdInstaller._logger.debug('Creating cluster "{0}" on {1} completed'.format(cluster_name, ip))
def create_cluster(cluster_name, ip, server_port=DEFAULT_SERVER_PORT, client_port=DEFAULT_CLIENT_PORT): """ Creates a cluster :param cluster_name: Name of the cluster :type cluster_name: str :param ip: IP address of the first node of the new cluster :type ip: str :param server_port: Port to be used by server :type server_port: int :param client_port: Port to be used by client :type client_port: int :return: None """ EtcdInstaller._logger.debug('Creating cluster "{0}" on {1}'.format(cluster_name, ip)) client = SSHClient(ip, username='******') target_name = 'ovs-etcd-{0}'.format(cluster_name) if ServiceManager.has_service(target_name, client) and ServiceManager.get_service_status(target_name, client) is True: EtcdInstaller._logger.info('Service {0} already configured and running'.format(target_name)) return node_name = System.get_my_machine_id(client) data_dir = EtcdInstaller.DATA_DIR.format(cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' ServiceManager.add_service(base_name, client, params={'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(ip, server_port), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(ip, client_port), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1', client_port), 'INITIAL_CLUSTER': '{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(ip, server_port)), 'INITIAL_STATE': 'new', 'INITIAL_PEERS': '-initial-advertise-peer-urls {0}'.format(EtcdInstaller.SERVER_URL.format(ip, server_port))}, target_name=target_name) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client, client_port=client_port) EtcdInstaller._logger.debug('Creating cluster "{0}" on {1} completed'.format(cluster_name, ip))
def _deploy(config, filesystem, offline_nodes=None): """ Deploys a complete cluster: Distributing the configuration files, creating directories and services """ if os.environ.get('RUNNING_UNITTESTS') == 'True': if filesystem is True: raise NotImplementedError('At this moment, there is no support for unittesting filesystem backend Arakoon clusters') ArakoonInstaller._logger.debug('Deploying cluster {0}'.format(config.cluster_id)) if offline_nodes is None: offline_nodes = [] for node in config.nodes: if node.ip in offline_nodes: continue ArakoonInstaller._logger.debug(' Deploying cluster {0} on {1}'.format(config.cluster_id, node.ip)) root_client = SSHClient(node.ip, username='******') # Distributes a configuration file to all its nodes config.write_config(node.ip) # Create dirs as root because mountpoint /mnt/cache1 is typically owned by root abs_paths = {node.tlog_dir, node.home} # That's a set if node.log_sinks.startswith('/'): abs_paths.add(os.path.dirname(os.path.abspath(node.log_sinks))) if node.crash_log_sinks.startswith('/'): abs_paths.add(os.path.dirname(os.path.abspath(node.crash_log_sinks))) abs_paths = list(abs_paths) root_client.dir_create(abs_paths) root_client.dir_chmod(abs_paths, 0755, recursive=True) root_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) # Creates services for/on all nodes in the config if config.filesystem is True: config_path = config.config_path else: config_path = Configuration.get_configuration_path(config.config_path) base_name = 'ovs-arakoon' target_name = 'ovs-arakoon-{0}'.format(config.cluster_id) ServiceManager.add_service(base_name, root_client, params={'CLUSTER': config.cluster_id, 'NODE_ID': node.name, 'CONFIG_PATH': config_path, 'STARTUP_DEPENDENCY': 'started ovs-watcher-config' if filesystem is False else '(local-filesystems and started networking)'}, target_name=target_name) ArakoonInstaller._logger.debug(' Deploying cluster {0} on {1} completed'.format(config.cluster_id, node.ip))
def _setup_proxy(initial_cluster, slave_client, cluster_name, force=False, client_port=DEFAULT_CLIENT_PORT): base_name = 'ovs-etcd-proxy' target_name = 'ovs-etcd-{0}'.format(cluster_name) if force is False and ServiceManager.has_service( target_name, slave_client) and ServiceManager.get_service_status( target_name, slave_client)[0] is True: EtcdInstaller._logger.info( 'Service {0} already configured and running'.format( target_name)) return EtcdInstaller.stop(cluster_name, slave_client) data_dir = EtcdInstaller.DATA_DIR.format(cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name) abs_paths = [data_dir, wal_dir] slave_client.dir_delete(abs_paths) slave_client.dir_create(data_dir) slave_client.dir_chmod(data_dir, 0755, recursive=True) slave_client.dir_chown(data_dir, 'ovs', 'ovs', recursive=True) ServiceManager.add_service(base_name, slave_client, params={ 'CLUSTER': cluster_name, 'DATA_DIR': data_dir, 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format( '127.0.0.1', client_port), 'INITIAL_CLUSTER': initial_cluster }, target_name=target_name) EtcdInstaller.start(cluster_name, slave_client) EtcdInstaller.wait_for_cluster(cluster_name, slave_client, client_port=client_port)
def _setup_proxy(initial_cluster, slave_client, cluster_name): base_name = 'ovs-etcd-proxy' target_name = 'ovs-etcd-{0}'.format(cluster_name) EtcdInstaller.stop(cluster_name, slave_client) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] slave_client.dir_delete(abs_paths) slave_client.dir_create(data_dir) slave_client.dir_chmod(data_dir, 0755, recursive=True) slave_client.dir_chown(data_dir, 'ovs', 'ovs', recursive=True) ServiceManager.add_service(base_name, slave_client, params={'CLUSTER': cluster_name, 'DATA_DIR': data_dir, 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': initial_cluster}, target_name=target_name) EtcdInstaller.start(cluster_name, slave_client)
def _deploy(config, filesystem, offline_nodes=None, plugins=None, delay_service_registration=False): """ Deploys a complete cluster: Distributing the configuration files, creating directories and services """ if os.environ.get('RUNNING_UNITTESTS') == 'True': if filesystem is True: raise NotImplementedError('At this moment, there is no support for unit-testing filesystem backend Arakoon clusters') ArakoonInstaller._logger.debug('Deploying cluster {0}'.format(config.cluster_id)) if offline_nodes is None: offline_nodes = [] service_metadata = {} for node in config.nodes: if node.ip in offline_nodes: continue ArakoonInstaller._logger.debug(' Deploying cluster {0} on {1}'.format(config.cluster_id, node.ip)) root_client = SSHClient(node.ip, username='******') # Distributes a configuration file to all its nodes config.write_config(node.ip) # Create dirs as root because mountpoint /mnt/cache1 is typically owned by root abs_paths = {node.tlog_dir, node.home} # That's a set if node.log_sinks.startswith('/'): abs_paths.add(os.path.dirname(os.path.abspath(node.log_sinks))) if node.crash_log_sinks.startswith('/'): abs_paths.add(os.path.dirname(os.path.abspath(node.crash_log_sinks))) abs_paths = list(abs_paths) root_client.dir_create(abs_paths) root_client.dir_chmod(abs_paths, 0755, recursive=True) root_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) # Creates services for/on all nodes in the config if config.filesystem is True: config_path = config.config_path else: config_path = Configuration.get_configuration_path(config.config_path) extra_version_cmd = '' if plugins is not None: extra_version_cmd = ';'.join(plugins) metadata = ServiceManager.add_service(name='ovs-arakoon', client=root_client, params={'CLUSTER': config.cluster_id, 'NODE_ID': node.name, 'CONFIG_PATH': config_path, 'EXTRA_VERSION_CMD': extra_version_cmd}, target_name='ovs-arakoon-{0}'.format(config.cluster_id), startup_dependency=('ovs-watcher-config' if filesystem is False else None), delay_registration=delay_service_registration) service_metadata[node.ip] = metadata ArakoonInstaller._logger.debug(' Deploying cluster {0} on {1} completed'.format(config.cluster_id, node.ip)) return service_metadata
def _deploy(config, offline_nodes=None): """ Deploys a complete cluster: Distributing the configuration files, creating directories and services """ logger.debug('Deploying cluster {0}'.format(config.cluster_id)) if offline_nodes is None: offline_nodes = [] for node in config.nodes: if node.ip in offline_nodes: continue logger.debug(' Deploying cluster {0} on {1}'.format( config.cluster_id, node.ip)) root_client = SSHClient(node.ip, username='******') # Distributes a configuration file to all its nodes config.write_config() # Create dirs as root because mountpoint /mnt/cache1 is typically owned by root abs_paths = [node.log_dir, node.tlog_dir, node.home] root_client.dir_create(abs_paths) root_client.dir_chmod(abs_paths, 0755, recursive=True) root_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) # Creates services for/on all nodes in the config base_name = 'ovs-arakoon' target_name = 'ovs-arakoon-{0}'.format(config.cluster_id) ServiceManager.add_service( base_name, root_client, params={ 'CLUSTER': config.cluster_id, 'NODE_ID': node.name, 'CONFIG_PATH': ArakoonInstaller.ETCD_CONFIG_PATH.format(config.cluster_id) }, target_name=target_name) logger.debug(' Deploying cluster {0} on {1} completed'.format( config.cluster_id, node.ip))
def create_cluster(cluster_name, ip): """ Creates a cluster :param base_dir: Base directory that should contain the data :param ip: IP address of the first node of the new cluster :param cluster_name: Name of the cluster """ logger.debug('Creating cluster "{0}" on {1}'.format(cluster_name, ip)) client = SSHClient(ip, username='******') node_name = System.get_my_machine_id(client) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) ServiceManager.add_service(base_name, client, params={'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(ip), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(ip), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': '{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(ip)), 'INITIAL_STATE': 'new', 'INITIAL_PEERS': '-initial-advertise-peer-urls {0}'.format(EtcdInstaller.SERVER_URL.format(ip))}, target_name=target_name) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client) logger.debug('Creating cluster "{0}" on {1} completed'.format(cluster_name, ip))
def demote_node(cluster_ip, master_ip, ip_client_map, unique_id, unconfigure_memcached, unconfigure_rabbitmq, offline_nodes=None): """ Demotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeTypeController._logger, messages='Demoting node', title=True) if offline_nodes is None: offline_nodes = [] if unconfigure_memcached is True and len(offline_nodes) == 0: if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False: raise RuntimeError('Not all memcache nodes can be reached which is required for demoting a node.') # Find other (arakoon) master nodes arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False) config.load_config() master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError('There should be at least one other master node') storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'EXTRA' storagerouter.save() offline_node_ips = [node.ip for node in offline_nodes] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon {0} cluster'.format(arakoon_cluster_name)) ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, remaining_node_ips=master_node_ips, cluster_name=arakoon_cluster_name, offline_nodes=offline_node_ips) try: external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: config_store = Configuration.get_store() if config_store == 'arakoon': Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon config cluster') ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, remaining_node_ips=master_node_ips, cluster_name='config', offline_nodes=offline_node_ips, filesystem=True) else: from ovs.extensions.db.etcd.installer import EtcdInstaller Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Etcd cluster') EtcdInstaller.shrink_cluster(master_ip, cluster_ip, 'config', offline_node_ips) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to leave configuration cluster', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') try: if unconfigure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 11211) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if unconfigure_rabbitmq is True: endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 5672) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to update configurations', ex], loglevel='exception') if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') remaining_nodes = ip_client_map.keys()[:] if cluster_ip in remaining_nodes: remaining_nodes.remove(cluster_ip) PersistentFactory.store = None VolatileFactory.store = None for service in storagerouter.services: if service.name == 'arakoon-ovsdb': service.delete() target_client = None if storagerouter in offline_nodes: if unconfigure_rabbitmq is True: Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring offline RabbitMQ node') client = ip_client_map[master_ip] try: client.run(['rabbitmqctl', 'forget_cluster_node', 'rabbit@{0}'.format(storagerouter.name)]) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to forget RabbitMQ cluster node', ex], loglevel='exception') else: target_client = ip_client_map[cluster_ip] if unconfigure_rabbitmq is True: Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring RabbitMQ') try: if ServiceManager.has_service('rabbitmq-server', client=target_client): Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run(['rabbitmqctl', 'reset']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) target_client.file_unlink("/var/lib/rabbitmq/.erlang.cookie") Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) # To be sure except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove/unconfigure RabbitMQ', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Stopping services') services = ['memcached', 'rabbitmq-server'] if unconfigure_rabbitmq is False: services.remove('rabbitmq-server') if unconfigure_memcached is False: services.remove('memcached') for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.log(logger=NodeTypeController._logger, messages='Stopping service {0}'.format(service)) try: Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to stop service'.format(service), ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Removing services') services = ['scheduled-tasks', 'webapp-api', 'volumerouter-consumer'] for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.log(logger=NodeTypeController._logger, messages='Removing service {0}'.format(service)) try: Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger) ServiceManager.remove_service(service, client=target_client) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove service'.format(service), ex], loglevel='exception') if ServiceManager.has_service('workers', client=target_client): ServiceManager.add_service(name='workers', client=target_client, params={'WORKER_QUEUE': '{0}'.format(unique_id)}) try: NodeTypeController._configure_amqp_to_volumedriver() except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to configure AMQP to Storage Driver', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if Toolbox.run_hooks(component='nodetype', sub_component='demote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip, offline_node_ips=offline_node_ips): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if storagerouter not in offline_nodes: target_client = ip_client_map[cluster_ip] node_name, _ = target_client.get_hostname() if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='extra', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id), 'EXTRA') if target_client is not None and target_client.file_exists('/tmp/ovs_rollback'): target_client.file_write('/tmp/ovs_rollback', 'rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Demote complete', title=True)
def extend_cluster(master_ip, new_ip, cluster_name): """ Extends a cluster to a given new node :param base_dir: Base directory that will hold the data :param cluster_name: Name of the cluster to be extended :param new_ip: IP address of the node to be added :param master_ip: IP of one of the already existing nodes """ logger.debug('Extending cluster "{0}" from {1} to {2}'.format( cluster_name, master_ip, new_ip)) client = SSHClient(master_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, client): raise RuntimeError( 'Cluster "{0}" unhealthy, aborting extend'.format( cluster_name)) current_cluster = [] for item in client.run('etcdctl member list').splitlines(): info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() current_cluster.append('{0}={1}'.format(info['name'], info['peer'])) client = SSHClient(new_ip, username='******') node_name = System.get_my_machine_id(client) current_cluster.append('{0}={1}'.format( node_name, EtcdInstaller.SERVER_URL.format(new_ip))) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) EtcdInstaller.stop(cluster_name, client) # Stop a possible proxy service ServiceManager.add_service( base_name, client, params={ 'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(new_ip), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': ','.join(current_cluster), 'INITIAL_STATE': 'existing', 'INITIAL_PEERS': '' }, target_name=target_name) master_client = SSHClient(master_ip, username='******') master_client.run('etcdctl member add {0} {1}'.format( node_name, EtcdInstaller.SERVER_URL.format(new_ip))) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client) logger.debug( 'Extending cluster "{0}" from {1} to {2} completed'.format( cluster_name, master_ip, new_ip))
def _deploy(config, filesystem, offline_nodes=None): """ Deploys a complete cluster: Distributing the configuration files, creating directories and services """ if os.environ.get('RUNNING_UNITTESTS') == 'True': if filesystem is True: raise NotImplementedError( 'At this moment, there is no support for unittesting filesystem backend Arakoon clusters' ) ArakoonInstaller._logger.debug('Deploying cluster {0}'.format( config.cluster_id)) if offline_nodes is None: offline_nodes = [] for node in config.nodes: if node.ip in offline_nodes: continue ArakoonInstaller._logger.debug( ' Deploying cluster {0} on {1}'.format( config.cluster_id, node.ip)) root_client = SSHClient(node.ip, username='******') # Distributes a configuration file to all its nodes config.write_config(node.ip) # Create dirs as root because mountpoint /mnt/cache1 is typically owned by root abs_paths = {node.tlog_dir, node.home} # That's a set if node.log_sinks.startswith('/'): abs_paths.add(os.path.dirname(os.path.abspath(node.log_sinks))) if node.crash_log_sinks.startswith('/'): abs_paths.add( os.path.dirname(os.path.abspath(node.crash_log_sinks))) abs_paths = list(abs_paths) root_client.dir_create(abs_paths) root_client.dir_chmod(abs_paths, 0755, recursive=True) root_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) # Creates services for/on all nodes in the config if config.filesystem is True: config_path = config.config_path else: config_path = Configuration.get_configuration_path( config.config_path) base_name = 'ovs-arakoon' target_name = 'ovs-arakoon-{0}'.format(config.cluster_id) ServiceManager.add_service( base_name, root_client, params={ 'CLUSTER': config.cluster_id, 'NODE_ID': node.name, 'CONFIG_PATH': config_path, 'STARTUP_DEPENDENCY': 'started ovs-watcher-config' if filesystem is False else '(local-filesystems and started networking)' }, target_name=target_name) ArakoonInstaller._logger.debug( ' Deploying cluster {0} on {1} completed'.format( config.cluster_id, node.ip))
def post_upgrade(client): """ Upgrade actions after the new packages have actually been installed :param client: SSHClient object :return: None """ # If we can reach Etcd with a valid config, and there's still an old config file present, delete it from ovs.extensions.db.etcd.configuration import EtcdConfiguration path = '/opt/OpenvStorage/config/ovs.json' if EtcdConfiguration.exists( '/ovs/framework/cluster_id') and client.file_exists(path): client.file_delete(path) # Migrate volumedriver & albaproxy configuration files import uuid from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.system import System with remote(client.ip, [StorageDriverConfiguration, os, open, json, System], username='******') as rem: configuration_dir = '{0}/storagedriver/storagedriver'.format( EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) host_id = rem.System.get_my_machine_id() if rem.os.path.exists(configuration_dir): for storagedriver in StorageDriverList.get_storagedrivers_by_storagerouter( rem.System.get_my_storagerouter().guid): vpool = storagedriver.vpool if storagedriver.alba_proxy is not None: config_tree = '/ovs/vpools/{0}/proxies/{1}/config/{{0}}'.format( vpool.guid, storagedriver.alba_proxy.guid) # ABM config abm_config = '{0}/{1}_alba.cfg'.format( configuration_dir, vpool.name) if rem.os.path.exists(abm_config): with rem.open(abm_config) as config_file: EtcdConfiguration.set( config_tree.format('abm'), config_file.read(), raw=True) rem.os.remove(abm_config) # Albaproxy config alba_config = '{0}/{1}_alba.json'.format( configuration_dir, vpool.name) if rem.os.path.exists(alba_config): with rem.open(alba_config) as config_file: config = rem.json.load(config_file) del config['albamgr_cfg_file'] config[ 'albamgr_cfg_url'] = 'etcd://127.0.0.1:2379{0}'.format( config_tree.format('abm')) EtcdConfiguration.set( config_tree.format('main'), json.dumps(config, indent=4), raw=True) params = { 'VPOOL_NAME': vpool.name, 'VPOOL_GUID': vpool.guid, 'PROXY_ID': storagedriver.alba_proxy.guid } alba_proxy_service = 'ovs-albaproxy_{0}'.format( vpool.name) ServiceManager.add_service( name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) rem.os.remove(alba_config) # Volumedriver config current_file = '{0}/{1}.json'.format( configuration_dir, vpool.name) if rem.os.path.exists(current_file): readcache_size = 0 with rem.open(current_file) as config_file: config = rem.json.load(config_file) config['distributed_transaction_log'] = {} config['distributed_transaction_log'][ 'dtl_transport'] = config['failovercache'][ 'failovercache_transport'] config['distributed_transaction_log'][ 'dtl_path'] = config['failovercache'][ 'failovercache_path'] config['volume_manager'][ 'dtl_throttle_usecs'] = config['volume_manager'][ 'foc_throttle_usecs'] del config['failovercache'] del config['volume_manager']['foc_throttle_usecs'] sdc = rem.StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) sdc.configuration = config sdc.save(reload_config=False) for mountpoint in config['content_addressed_cache'][ 'clustercache_mount_points']: readcache_size += int(mountpoint['size'].replace( 'KiB', '')) params = { 'VPOOL_MOUNTPOINT': storagedriver.mountpoint, 'HYPERVISOR_TYPE': storagedriver.storagerouter.pmachine.hvtype, 'VPOOL_NAME': vpool.name, 'CONFIG_PATH': sdc.remote_path, 'UUID': str(uuid.uuid4()), 'OVS_UID': client.run('id -u ovs').strip(), 'OVS_GID': client.run('id -g ovs').strip(), 'KILL_TIMEOUT': str( int(readcache_size / 1024.0 / 1024.0 / 6.0 + 30)) } vmware_mode = EtcdConfiguration.get( '/ovs/framework/hosts/{0}/storagedriver|vmware_mode' .format(host_id)) dtl_service = 'ovs-dtl_{0}'.format(vpool.name) ServiceManager.add_service(name='ovs-dtl', params=params, client=client, target_name=dtl_service) if vpool.backend_type.code == 'alba': alba_proxy_service = 'ovs-albaproxy_{0}'.format( vpool.name) dependencies = [alba_proxy_service] else: dependencies = None if vmware_mode == 'ganesha': template_name = 'ovs-ganesha' else: template_name = 'ovs-volumedriver' voldrv_service = 'ovs-volumedriver_{0}'.format( vpool.name) ServiceManager.add_service( name=template_name, params=params, client=client, target_name=voldrv_service, additional_dependencies=dependencies) rem.os.remove(current_file) # Ganesha config, if available current_file = '{0}/{1}_ganesha.conf'.format( configuration_dir, vpool.name) if rem.os.path.exists(current_file): sdc = rem.StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) contents = '' for template in ['ganesha-core', 'ganesha-export']: contents += client.file_read( '/opt/OpenvStorage/config/templates/{0}.conf'. format(template)) params = { 'VPOOL_NAME': vpool.name, 'VPOOL_MOUNTPOINT': '/mnt/{0}'.format(vpool.name), 'CONFIG_PATH': sdc.remote_path, 'NFS_FILESYSTEM_ID': storagedriver.storagerouter.ip.split('.', 2)[-1] } for key, value in params.iteritems(): contents = contents.replace( '<{0}>'.format(key), value) client.file_write(current_file, contents)
def post_upgrade(client): """ Upgrade actions after the new packages have actually been installed :param client: SSHClient object :return: None """ # If we can reach Etcd with a valid config, and there's still an old config file present, delete it from ovs.extensions.db.etcd.configuration import EtcdConfiguration path = '/opt/OpenvStorage/config/ovs.json' if EtcdConfiguration.exists('/ovs/framework/cluster_id') and client.file_exists(path): client.file_delete(path) # Migrate volumedriver & albaproxy configuration files import uuid from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.system import System with remote(client.ip, [StorageDriverConfiguration, os, open, json, System], username='******') as rem: configuration_dir = '{0}/storagedriver/storagedriver'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) host_id = rem.System.get_my_machine_id() if rem.os.path.exists(configuration_dir): for storagedriver in StorageDriverList.get_storagedrivers_by_storagerouter(rem.System.get_my_storagerouter().guid): vpool = storagedriver.vpool if storagedriver.alba_proxy is not None: config_tree = '/ovs/vpools/{0}/proxies/{1}/config/{{0}}'.format(vpool.guid, storagedriver.alba_proxy.guid) # ABM config abm_config = '{0}/{1}_alba.cfg'.format(configuration_dir, vpool.name) if rem.os.path.exists(abm_config): with rem.open(abm_config) as config_file: EtcdConfiguration.set(config_tree.format('abm'), config_file.read(), raw=True) rem.os.remove(abm_config) # Albaproxy config alba_config = '{0}/{1}_alba.json'.format(configuration_dir, vpool.name) if rem.os.path.exists(alba_config): with rem.open(alba_config) as config_file: config = rem.json.load(config_file) del config['albamgr_cfg_file'] config['albamgr_cfg_url'] = 'etcd://127.0.0.1:2379{0}'.format(config_tree.format('abm')) EtcdConfiguration.set(config_tree.format('main'), json.dumps(config, indent=4), raw=True) params = {'VPOOL_NAME': vpool.name, 'VPOOL_GUID': vpool.guid, 'PROXY_ID': storagedriver.alba_proxy.guid} alba_proxy_service = 'ovs-albaproxy_{0}'.format(vpool.name) ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) rem.os.remove(alba_config) # Volumedriver config current_file = '{0}/{1}.json'.format(configuration_dir, vpool.name) if rem.os.path.exists(current_file): readcache_size = 0 with rem.open(current_file) as config_file: config = rem.json.load(config_file) config['distributed_transaction_log'] = {} config['distributed_transaction_log']['dtl_transport'] = config['failovercache']['failovercache_transport'] config['distributed_transaction_log']['dtl_path'] = config['failovercache']['failovercache_path'] config['volume_manager']['dtl_throttle_usecs'] = config['volume_manager']['foc_throttle_usecs'] del config['failovercache'] del config['volume_manager']['foc_throttle_usecs'] sdc = rem.StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id) sdc.configuration = config sdc.save(reload_config=False) for mountpoint in config['content_addressed_cache']['clustercache_mount_points']: readcache_size += int(mountpoint['size'].replace('KiB', '')) params = {'VPOOL_MOUNTPOINT': storagedriver.mountpoint, 'HYPERVISOR_TYPE': storagedriver.storagerouter.pmachine.hvtype, 'VPOOL_NAME': vpool.name, 'CONFIG_PATH': sdc.remote_path, 'UUID': str(uuid.uuid4()), 'OVS_UID': client.run('id -u ovs').strip(), 'OVS_GID': client.run('id -g ovs').strip(), 'KILL_TIMEOUT': str(int(readcache_size / 1024.0 / 1024.0 / 6.0 + 30))} vmware_mode = EtcdConfiguration.get('/ovs/framework/hosts/{0}/storagedriver|vmware_mode'.format(host_id)) dtl_service = 'ovs-dtl_{0}'.format(vpool.name) ServiceManager.add_service(name='ovs-dtl', params=params, client=client, target_name=dtl_service) if vpool.backend_type.code == 'alba': alba_proxy_service = 'ovs-albaproxy_{0}'.format(vpool.name) dependencies = [alba_proxy_service] else: dependencies = None if vmware_mode == 'ganesha': template_name = 'ovs-ganesha' else: template_name = 'ovs-volumedriver' voldrv_service = 'ovs-volumedriver_{0}'.format(vpool.name) ServiceManager.add_service(name=template_name, params=params, client=client, target_name=voldrv_service, additional_dependencies=dependencies) rem.os.remove(current_file) # Ganesha config, if available current_file = '{0}/{1}_ganesha.conf'.format(configuration_dir, vpool.name) if rem.os.path.exists(current_file): sdc = rem.StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id) contents = '' for template in ['ganesha-core', 'ganesha-export']: contents += client.file_read('/opt/OpenvStorage/config/templates/{0}.conf'.format(template)) params = {'VPOOL_NAME': vpool.name, 'VPOOL_MOUNTPOINT': '/mnt/{0}'.format(vpool.name), 'CONFIG_PATH': sdc.remote_path, 'NFS_FILESYSTEM_ID': storagedriver.storagerouter.ip.split('.', 2)[-1]} for key, value in params.iteritems(): contents = contents.replace('<{0}>'.format(key), value) client.file_write(current_file, contents)
def extend_cluster(master_ip, new_ip, cluster_name, server_port=DEFAULT_SERVER_PORT, client_port=DEFAULT_CLIENT_PORT): """ Extends a cluster to a given new node :param master_ip: IP of one of the already existing nodes :type master_ip: str :param new_ip: IP address of the node to be added :type new_ip: str :param cluster_name: Name of the cluster to be extended :type cluster_name: str :param server_port: Port to be used by server :type server_port: int :param client_port: Port to be used by client :type client_port: int """ EtcdInstaller._logger.debug('Extending cluster "{0}" from {1} to {2}'.format(cluster_name, master_ip, new_ip)) master_client = SSHClient(master_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, master_client, client_port=client_port): raise RuntimeError('Cluster "{0}" unhealthy, aborting extend'.format(cluster_name)) command = 'etcdctl member list' new_server_url = EtcdInstaller.SERVER_URL.format(new_ip, server_port) if client_port != EtcdInstaller.DEFAULT_CLIENT_PORT: command = 'etcdctl --peers={0}:{1} member list'.format(master_ip, client_port) cluster_members = master_client.run(command).splitlines() for cluster_member in cluster_members: if new_server_url in cluster_member: EtcdInstaller._logger.info('Node {0} already member of etcd cluster'.format(new_ip)) return current_cluster = [] for item in cluster_members: info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() current_cluster.append('{0}={1}'.format(info['name'], info['peer'])) new_client = SSHClient(new_ip, username='******') node_name = System.get_my_machine_id(new_client) current_cluster.append('{0}={1}'.format(node_name, new_server_url)) data_dir = EtcdInstaller.DATA_DIR.format(cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name) abs_paths = [data_dir, wal_dir] new_client.dir_delete(abs_paths) new_client.dir_create(abs_paths) new_client.dir_chmod(abs_paths, 0755, recursive=True) new_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) EtcdInstaller.stop(cluster_name, new_client) # Stop a possible proxy service ServiceManager.add_service(base_name, new_client, params={'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': new_server_url, 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip, client_port), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1', client_port), 'INITIAL_CLUSTER': ','.join(current_cluster), 'INITIAL_STATE': 'existing', 'INITIAL_PEERS': ''}, target_name=target_name) add_command = 'etcdctl member add {0} {1}'.format(node_name, new_server_url) if client_port != EtcdInstaller.DEFAULT_CLIENT_PORT: add_command = 'etcdctl --peers={0}:{1} member add {2} {3}'.format(master_ip, client_port, node_name, new_server_url) master_client.run(add_command) EtcdInstaller.start(cluster_name, new_client) EtcdInstaller.wait_for_cluster(cluster_name, new_client, client_port=client_port) EtcdInstaller._logger.debug('Extending cluster "{0}" from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
def extend_cluster(master_ip, new_ip, cluster_name, server_port=DEFAULT_SERVER_PORT, client_port=DEFAULT_CLIENT_PORT): """ Extends a cluster to a given new node :param master_ip: IP of one of the already existing nodes :type master_ip: str :param new_ip: IP address of the node to be added :type new_ip: str :param cluster_name: Name of the cluster to be extended :type cluster_name: str :param server_port: Port to be used by server :type server_port: int :param client_port: Port to be used by client :type client_port: int """ EtcdInstaller._logger.debug('Extending cluster "{0}" from {1} to {2}'.format(cluster_name, master_ip, new_ip)) master_client = SSHClient(master_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, master_client, client_port=client_port): raise RuntimeError('Cluster "{0}" unhealthy, aborting extend'.format(cluster_name)) command = ['etcdctl', 'member', 'list'] new_server_url = EtcdInstaller.SERVER_URL.format(new_ip, server_port) if client_port != EtcdInstaller.DEFAULT_CLIENT_PORT: command = ['etcdctl', '--peers={0}:{1}'.format(master_ip, client_port), 'member', 'list'] cluster_members = master_client.run(command).splitlines() for cluster_member in cluster_members: if new_server_url in cluster_member: EtcdInstaller._logger.info('Node {0} already member of etcd cluster'.format(new_ip)) return current_cluster = [] for item in cluster_members: info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() current_cluster.append('{0}={1}'.format(info['name'], info['peer'])) new_client = SSHClient(new_ip, username='******') node_name = System.get_my_machine_id(new_client) current_cluster.append('{0}={1}'.format(node_name, new_server_url)) data_dir = EtcdInstaller.DATA_DIR.format(cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name) abs_paths = [data_dir, wal_dir] new_client.dir_delete(abs_paths) new_client.dir_create(abs_paths) new_client.dir_chmod(abs_paths, 0755, recursive=True) new_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) EtcdInstaller.stop(cluster_name, new_client) # Stop a possible proxy service params = ServiceManager.add_service(name=base_name, client=new_client, target_name=target_name, delay_registration=True, params={'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': new_server_url, 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip, client_port), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1', client_port), 'INITIAL_CLUSTER': ','.join(current_cluster), 'INITIAL_STATE': 'existing', 'INITIAL_PEERS': ''}) add_command = ['etcdctl', 'member', 'add', node_name, new_server_url] if client_port != EtcdInstaller.DEFAULT_CLIENT_PORT: add_command = ['etcdctl', '--peers={0}:{1}'.format(master_ip, client_port), 'member', 'add', node_name, new_server_url] master_client.run(add_command) EtcdInstaller.start(cluster_name, new_client) EtcdInstaller.wait_for_cluster(cluster_name, new_client, client_port=client_port) ServiceManager.register_service(service_metadata=params, node_name=node_name) EtcdInstaller._logger.debug('Extending cluster "{0}" from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
def configure_host(self, ip): if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False: self._logger.warning('Configure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed') return # 1. Get Driver code self._logger.info('*** Configuring host with IP {0} ***'.format(ip)) self._logger.info(' Copy driver code') remote_driver = "/opt/OpenvStorage/config/templates/cinder-volume-driver/{0}/openvstorage.py".format(self._stack_version) remote_version = '0.0.0' existing_version = '0.0.0' try: from cinder.volume.drivers import openvstorage if hasattr(openvstorage, 'OVSVolumeDriver'): existing_version = getattr(openvstorage.OVSVolumeDriver, 'VERSION', '0.0.0') except ImportError: pass for line in self.client.file_read(remote_driver).splitlines(): if 'VERSION = ' in line: remote_version = line.split('VERSION = ')[-1].strip().replace("'", "").replace('"', "") break nova_base_path = self._get_base_path('nova') cinder_base_path = self._get_base_path('cinder') if self._is_devstack is True: local_driver = '{0}/volume/drivers/openvstorage.py'.format(cinder_base_path) else: local_driver = '{0}/cinder/volume/drivers/openvstorage.py'.format(self._driver_location) if remote_version > existing_version: self._logger.debug('Updating existing driver using {0} from version {1} to version {2}'.format(remote_driver, existing_version, remote_version)) self.client.run('cp -f {0} {1}'.format(remote_driver, local_driver)) else: self._logger.debug('Using driver {0} version {1}'.format(local_driver, existing_version)) # 2. Configure users and groups self._logger.info(' Add users to group ovs') users = ['libvirt-qemu', 'stack'] if self._is_devstack is True else self._openstack_users for user in users: self.client.run('usermod -a -G ovs {0}'.format(user)) # 3. Apply patches self._logger.info(' Applying patches') if self._stack_version in ('liberty', 'mitaka', 'newton'): try: import os_brick cinder_brick_initiator_file = "{0}/initiator/connector.py".format(os.path.dirname(os_brick.__file__)) except ImportError: cinder_brick_initiator_file = '' if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume/volume.py'.format(nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume/volume.py'.format(self._driver_location) else: cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format(self._driver_location) if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume.py'.format(nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format(self._driver_location) if self._is_devstack is True: nova_driver_file = '{0}/virt/libvirt/driver.py'.format(nova_base_path) else: nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format(self._driver_location) self._logger.info(' Patching file {0}'.format(nova_volume_file)) file_contents = self.client.file_read(nova_volume_file) if 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):' not in file_contents: file_contents += ''' class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver): def __init__(self, connection): super(LibvirtFileVolumeDriver, self).__init__(connection, is_block_dev=False) def get_config(self, connection_info, disk_info): conf = super(LibvirtFileVolumeDriver, self).get_config(connection_info, disk_info) conf.source_type = 'file' conf.source_path = connection_info['data']['device_path'] return conf ''' self.client.file_write(nova_volume_file, file_contents) self._logger.info(' Patching file {0}'.format(nova_driver_file)) file_contents = self.client.file_read(nova_driver_file) if self._stack_version in ('liberty', 'mitaka'): check_line = 'local=nova.virt.libvirt.volume.volume.LibvirtVolumeDriver' new_line = 'file=nova.virt.libvirt.volume.volume.LibvirtFileVolumeDriver' else: check_line = 'local=nova.virt.libvirt.volume.LibvirtVolumeDriver' new_line = 'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver' if new_line not in file_contents: for line in file_contents.splitlines(): if check_line in line: stripped_line = line.rstrip() whitespaces = len(stripped_line) - len(stripped_line.lstrip()) new_line = "{0}'{1}',\n".format(' ' * whitespaces, new_line) fc = file_contents[:file_contents.index(line)] + new_line + file_contents[file_contents.index(line):] self.client.file_write(nova_driver_file, "".join(fc)) break if os.path.exists(cinder_brick_initiator_file): # fix brick/upload to glance self._logger.info(' Patching file {0}'.format(cinder_brick_initiator_file)) if self._stack_version in ('liberty', 'mitaka', 'newton'): self.client.run("""sed -i 's/elif protocol == LOCAL:/elif protocol in [LOCAL, "FILE"]:/g' {0}""".format(cinder_brick_initiator_file)) else: self.client.run("""sed -i 's/elif protocol == "LOCAL":/elif protocol in ["LOCAL", "FILE"]:/g' {0}""".format(cinder_brick_initiator_file)) # 4. Configure messaging driver self._logger.info(' - Configure messaging driver') nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' with remote(ip, [RawConfigParser, open], 'root') as rem: for config_file, driver in {self._NOVA_CONF: nova_messaging_driver, self._CINDER_CONF: cinder_messaging_driver}.iteritems(): changed = False cfg = rem.RawConfigParser() cfg.read([config_file]) if cfg.has_option("DEFAULT", "notification_driver"): if cfg.get("DEFAULT", "notification_driver") != driver: changed = True cfg.set("DEFAULT", "notification_driver", driver) else: changed = True cfg.set("DEFAULT", "notification_driver", driver) if cfg.has_option("DEFAULT", "notification_topics"): notification_topics = cfg.get("DEFAULT", "notification_topics").split(",") if "notifications" not in notification_topics: notification_topics.append("notifications") changed = True cfg.set("DEFAULT", "notification_topics", ",".join(notification_topics)) else: changed = True cfg.set("DEFAULT", "notification_topics", "notifications") if config_file == self._NOVA_CONF: for param, value in {'notify_on_any_change': 'True', 'notify_on_state_change': 'vm_and_task_state'}.iteritems(): if not cfg.has_option("DEFAULT", param): changed = True cfg.set("DEFAULT", param, value) if changed is True: with rem.open(config_file, "w") as fp: cfg.write(fp) # 5. Enable events consumer self._logger.info(' - Enabling events consumer service') service_name = 'openstack-events-consumer' if not ServiceManager.has_service(service_name, self.client): ServiceManager.add_service(service_name, self.client) ServiceManager.enable_service(service_name, self.client) ServiceManager.start_service(service_name, self.client)
def configure_host(self, ip): if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False: self._logger.warning( 'Configure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed' ) return # 1. Get Driver code self._logger.info('*** Configuring host with IP {0} ***'.format(ip)) self._logger.info(' Copy driver code') remote_driver = "/opt/OpenvStorage/config/templates/cinder-volume-driver/{0}/openvstorage.py".format( self._stack_version) remote_version = '0.0.0' existing_version = '0.0.0' try: from cinder.volume.drivers import openvstorage if hasattr(openvstorage, 'OVSVolumeDriver'): existing_version = getattr(openvstorage.OVSVolumeDriver, 'VERSION', '0.0.0') except ImportError: pass for line in self.client.file_read(remote_driver).splitlines(): if 'VERSION = ' in line: remote_version = line.split('VERSION = ')[-1].strip().replace( "'", "").replace('"', "") break nova_base_path = self._get_base_path('nova') cinder_base_path = self._get_base_path('cinder') if self._is_devstack is True: local_driver = '{0}/volume/drivers/openvstorage.py'.format( cinder_base_path) else: local_driver = '{0}/cinder/volume/drivers/openvstorage.py'.format( self._driver_location) if remote_version > existing_version: self._logger.debug( 'Updating existing driver using {0} from version {1} to version {2}' .format(remote_driver, existing_version, remote_version)) self.client.run('cp -f {0} {1}'.format(remote_driver, local_driver)) else: self._logger.debug('Using driver {0} version {1}'.format( local_driver, existing_version)) # 2. Configure users and groups self._logger.info(' Add users to group ovs') users = ['libvirt-qemu', 'stack' ] if self._is_devstack is True else self._openstack_users for user in users: self.client.run('usermod -a -G ovs {0}'.format(user)) # 3. Apply patches self._logger.info(' Applying patches') if self._stack_version in ('liberty', 'mitaka', 'newton'): try: import os_brick cinder_brick_initiator_file = "{0}/initiator/connector.py".format( os.path.dirname(os_brick.__file__)) except ImportError: cinder_brick_initiator_file = '' if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume/volume.py'.format( nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume/volume.py'.format( self._driver_location) else: cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format( self._driver_location) if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume.py'.format( nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format( self._driver_location) if self._is_devstack is True: nova_driver_file = '{0}/virt/libvirt/driver.py'.format( nova_base_path) else: nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format( self._driver_location) self._logger.info(' Patching file {0}'.format(nova_volume_file)) file_contents = self.client.file_read(nova_volume_file) if 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):' not in file_contents: file_contents += ''' class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver): def __init__(self, connection): super(LibvirtFileVolumeDriver, self).__init__(connection, is_block_dev=False) def get_config(self, connection_info, disk_info): conf = super(LibvirtFileVolumeDriver, self).get_config(connection_info, disk_info) conf.source_type = 'file' conf.source_path = connection_info['data']['device_path'] return conf ''' self.client.file_write(nova_volume_file, file_contents) self._logger.info(' Patching file {0}'.format(nova_driver_file)) file_contents = self.client.file_read(nova_driver_file) if self._stack_version in ('liberty', 'mitaka'): check_line = 'local=nova.virt.libvirt.volume.volume.LibvirtVolumeDriver' new_line = 'file=nova.virt.libvirt.volume.volume.LibvirtFileVolumeDriver' else: check_line = 'local=nova.virt.libvirt.volume.LibvirtVolumeDriver' new_line = 'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver' if new_line not in file_contents: for line in file_contents.splitlines(): if check_line in line: stripped_line = line.rstrip() whitespaces = len(stripped_line) - len( stripped_line.lstrip()) new_line = "{0}'{1}',\n".format(' ' * whitespaces, new_line) fc = file_contents[:file_contents. index(line)] + new_line + file_contents[ file_contents.index(line):] self.client.file_write(nova_driver_file, "".join(fc)) break if os.path.exists(cinder_brick_initiator_file): # fix brick/upload to glance self._logger.info( ' Patching file {0}'.format(cinder_brick_initiator_file)) if self._stack_version in ('liberty', 'mitaka', 'newton'): self.client.run( """sed -i 's/elif protocol == LOCAL:/elif protocol in [LOCAL, "FILE"]:/g' {0}""" .format(cinder_brick_initiator_file)) else: self.client.run( """sed -i 's/elif protocol == "LOCAL":/elif protocol in ["LOCAL", "FILE"]:/g' {0}""" .format(cinder_brick_initiator_file)) # 4. Configure messaging driver self._logger.info(' - Configure messaging driver') nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' with remote(ip, [RawConfigParser, open], 'root') as rem: for config_file, driver in { self._NOVA_CONF: nova_messaging_driver, self._CINDER_CONF: cinder_messaging_driver }.iteritems(): changed = False cfg = rem.RawConfigParser() cfg.read([config_file]) if cfg.has_option("DEFAULT", "notification_driver"): if cfg.get("DEFAULT", "notification_driver") != driver: changed = True cfg.set("DEFAULT", "notification_driver", driver) else: changed = True cfg.set("DEFAULT", "notification_driver", driver) if cfg.has_option("DEFAULT", "notification_topics"): notification_topics = cfg.get( "DEFAULT", "notification_topics").split(",") if "notifications" not in notification_topics: notification_topics.append("notifications") changed = True cfg.set("DEFAULT", "notification_topics", ",".join(notification_topics)) else: changed = True cfg.set("DEFAULT", "notification_topics", "notifications") if config_file == self._NOVA_CONF: for param, value in { 'notify_on_any_change': 'True', 'notify_on_state_change': 'vm_and_task_state' }.iteritems(): if not cfg.has_option("DEFAULT", param): changed = True cfg.set("DEFAULT", param, value) if changed is True: with rem.open(config_file, "w") as fp: cfg.write(fp) # 5. Enable events consumer self._logger.info(' - Enabling events consumer service') service_name = 'openstack-events-consumer' if not ServiceManager.has_service(service_name, self.client): ServiceManager.add_service(service_name, self.client) ServiceManager.enable_service(service_name, self.client) ServiceManager.start_service(service_name, self.client)
def execute_scrub_work(queue, vpool, scrub_info, error_messages): """ Executes scrub work for a given vDisk queue and vPool, based on scrub_info :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool) :type queue: Queue :param vpool: the vPool object of the vDisks :type vpool: VPool :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter that needs to do the work :type scrub_info: dict :param error_messages: A list of error messages to be filled :type error_messages: list :return: a list of error messages :rtype: list """ def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics('info') vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs client = None lock_time = 5 * 60 storagerouter = scrub_info['storage_router'] scrub_directory = '{0}/scrub_work_{1}_{2}'.format(scrub_info['scrub_path'], vpool.name, storagerouter.name) scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(vpool.guid, storagerouter.guid) backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(vpool.guid, storagerouter.guid) alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(vpool.name, storagerouter.name) # Deploy a proxy try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_create(scrub_directory) client.dir_chmod(scrub_directory, 0777) # Celery task executed by 'ovs' user and should be able to write in it if ServiceManager.has_service(name=alba_proxy_service, client=client) is True and ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) scrub_config = Configuration.get(scrub_config_key) else: machine_id = System.get_my_machine_id(client) port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id)) port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0] # Scrub config # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini', # u'fragment_cache': [u'none'], # u'ips': [u'127.0.0.1'], # u'log_level': u'info', # u'manifest_cache_size': 17179869184, # u'port': 0, # u'transport': u'tcp'} # Backend config # {u'alba_connection_host': u'10.100.193.155', # u'alba_connection_port': 26204, # u'alba_connection_preset': u'preset', # u'alba_connection_timeout': 15, # u'alba_connection_transport': u'TCP', # u'backend_interface_retries_on_error': 5, # u'backend_interface_retry_backoff_multiplier': 2.0, # u'backend_interface_retry_interval_secs': 1, # u'backend_type': u'ALBA'} scrub_config = Configuration.get('ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)) scrub_config['port'] = port scrub_config['transport'] = 'tcp' Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True) params = {'VPOOL_NAME': vpool.name, 'LOG_SINK': LogHandler.get_sink_path('alba_proxy'), 'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key)} ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) ServiceManager.start_service(name=alba_proxy_service, client=client) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) backend_config = Configuration.get('ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, vpool.storagedrivers[0].storagedriver_id))['backend_connection_manager'] backend_config['alba_connection_host'] = '127.0.0.1' backend_config['alba_connection_port'] = scrub_config['port'] Configuration.set(backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message) if client is not None and ServiceManager.has_service(name=alba_proxy_service, client=client) is True: if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ServiceManager.stop_service(name=alba_proxy_service, client=client) ServiceManager.remove_service(name=alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) try: # Empty the queue with vDisks to scrub with remote(storagerouter.ip, [VDisk]) as rem: while True: vdisk = None vdisk_guid = queue.get(False) try: # Check MDS master is local. Trigger MDS handover if necessary vdisk = rem.VDisk(vdisk_guid) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'.format(vpool.name, storagerouter.name, vdisk.name, scrub_directory)) configs = _verify_mds_config(current_vdisk=vdisk) storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) if configs[0].get('ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'.format(vpool.name, storagerouter.name, vdisk.name)) MDSServiceController.ensure_safety(VDisk(vdisk_guid)) # Do not use a remote VDisk instance here configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get('ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.warning('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'.format(vpool.name, storagerouter.name, vdisk.name)) continue # Do the actual scrubbing with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'.format(vpool.name, storagerouter.name, vdisk.name)) work_units = locked_client.get_scrubbing_workunits() for work_unit in work_units: res = locked_client.scrub(work_unit=work_unit, scratch_dir=scrub_directory, log_sinks=[LogHandler.get_sink_path('scrubber', allow_override=True)], backend_config=Configuration.get_configuration_path(backend_config_key)) locked_client.apply_scrubbing_result(scrubbing_work_result=res) if work_units: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'.format(vpool.name, storagerouter.name, vdisk.name, len(work_units))) else: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'.format(vpool.name, storagerouter.name, vdisk.name)) except Exception: if vdisk is None: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(vpool.name, storagerouter.name, vdisk_guid) else: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(vpool.name, storagerouter.name, vdisk.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) except Empty: # Raised when all items have been fetched from the queue ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'.format(vpool.name, storagerouter.name)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(vpool.name, storagerouter.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) # Delete the proxy again try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_delete(scrub_directory) if ServiceManager.has_service(alba_proxy_service, client=client): ServiceManager.stop_service(alba_proxy_service, client=client) ServiceManager.remove_service(alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message)
def execute_scrub_work(queue, vpool, scrub_info, error_messages): """ Executes scrub work for a given vDisk queue and vPool, based on scrub_info :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool) :type queue: Queue :param vpool: the vPool object of the vDisks :type vpool: VPool :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter that needs to do the work :type scrub_info: dict :param error_messages: A list of error messages to be filled :type error_messages: list :return: a list of error messages :rtype: list """ def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics('info') vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs client = None lock_time = 5 * 60 storagerouter = scrub_info['storage_router'] scrub_directory = '{0}/scrub_work_{1}_{2}'.format( scrub_info['scrub_path'], vpool.name, storagerouter.name) scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format( vpool.guid, storagerouter.guid) backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format( vpool.guid, storagerouter.guid) alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format( vpool.name, storagerouter.name) # Deploy a proxy try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_create(scrub_directory) client.dir_chmod( scrub_directory, 0777 ) # Celery task executed by 'ovs' user and should be able to write in it if ServiceManager.has_service( name=alba_proxy_service, client=client ) is True and ServiceManager.get_service_status( name=alba_proxy_service, client=client) is True: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) scrub_config = Configuration.get(scrub_config_key) else: machine_id = System.get_my_machine_id(client) port_range = Configuration.get( '/ovs/framework/hosts/{0}/ports|storagedriver'.format( machine_id)) port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0] # Scrub config # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini', # u'fragment_cache': [u'none'], # u'ips': [u'127.0.0.1'], # u'log_level': u'info', # u'manifest_cache_size': 17179869184, # u'port': 0, # u'transport': u'tcp'} # Backend config # {u'alba_connection_host': u'10.100.193.155', # u'alba_connection_port': 26204, # u'alba_connection_preset': u'preset', # u'alba_connection_timeout': 15, # u'alba_connection_transport': u'TCP', # u'backend_interface_retries_on_error': 5, # u'backend_interface_retry_backoff_multiplier': 2.0, # u'backend_interface_retry_interval_secs': 1, # u'backend_type': u'ALBA'} scrub_config = Configuration.get( 'ovs/vpools/{0}/proxies/scrub/generic_scrub'.format( vpool.guid)) scrub_config['port'] = port scrub_config['transport'] = 'tcp' Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True) params = { 'VPOOL_NAME': vpool.name, 'LOG_SINK': LogHandler.get_sink_path('alba_proxy'), 'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key) } ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) ServiceManager.start_service(name=alba_proxy_service, client=client) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) backend_config = Configuration.get( 'ovs/vpools/{0}/hosts/{1}/config'.format( vpool.guid, vpool.storagedrivers[0].storagedriver_id ))['backend_connection_manager'] backend_config['alba_connection_host'] = '127.0.0.1' backend_config['alba_connection_port'] = scrub_config['port'] Configuration.set( backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format( vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message) if client is not None and ServiceManager.has_service( name=alba_proxy_service, client=client) is True: if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ServiceManager.stop_service(name=alba_proxy_service, client=client) ServiceManager.remove_service(name=alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) try: # Empty the queue with vDisks to scrub with remote(storagerouter.ip, [VDisk]) as rem: while True: vdisk = None vdisk_guid = queue.get(False) try: # Check MDS master is local. Trigger MDS handover if necessary vdisk = rem.VDisk(vdisk_guid) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}' .format(vpool.name, storagerouter.name, vdisk.name, scrub_directory)) configs = _verify_mds_config(current_vdisk=vdisk) storagedriver = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover' .format(vpool.name, storagerouter.name, vdisk.name)) MDSServiceController.ensure_safety( VDisk(vdisk_guid) ) # Do not use a remote VDisk instance here configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.warning( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local' .format(vpool.name, storagerouter.name, vdisk.name)) continue # Do the actual scrubbing with vdisk.storagedriver_client.make_locked_client( str(vdisk.volume_id)) as locked_client: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work' .format(vpool.name, storagerouter.name, vdisk.name)) work_units = locked_client.get_scrubbing_workunits( ) for work_unit in work_units: res = locked_client.scrub( work_unit=work_unit, scratch_dir=scrub_directory, log_sinks=[ LogHandler.get_sink_path( 'scrubber', allow_override=True) ], backend_config=Configuration. get_configuration_path(backend_config_key)) locked_client.apply_scrubbing_result( scrubbing_work_result=res) if work_units: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied' .format(vpool.name, storagerouter.name, vdisk.name, len(work_units))) else: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required' .format(vpool.name, storagerouter.name, vdisk.name)) except Exception: if vdisk is None: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format( vpool.name, storagerouter.name, vdisk_guid) else: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format( vpool.name, storagerouter.name, vdisk.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) except Empty: # Raised when all items have been fetched from the queue ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed' .format(vpool.name, storagerouter.name)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format( vpool.name, storagerouter.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) # Delete the proxy again try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_delete(scrub_directory) if ServiceManager.has_service(alba_proxy_service, client=client): ServiceManager.stop_service(alba_proxy_service, client=client) ServiceManager.remove_service(alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format( vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message)