def create_cluster(cluster_name, ip, server_port=DEFAULT_SERVER_PORT, client_port=DEFAULT_CLIENT_PORT): """ Creates a cluster :param cluster_name: Name of the cluster :type cluster_name: str :param ip: IP address of the first node of the new cluster :type ip: str :param server_port: Port to be used by server :type server_port: int :param client_port: Port to be used by client :type client_port: int :return: None """ EtcdInstaller._logger.debug('Creating cluster "{0}" on {1}'.format(cluster_name, ip)) client = SSHClient(ip, username='******') target_name = 'ovs-etcd-{0}'.format(cluster_name) if ServiceManager.has_service(target_name, client) and ServiceManager.get_service_status(target_name, client)[0] is True: EtcdInstaller._logger.info('Service {0} already configured and running'.format(target_name)) return node_name = System.get_my_machine_id(client) data_dir = EtcdInstaller.DATA_DIR.format(cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' params = ServiceManager.add_service(name=base_name, client=client, target_name=target_name, delay_registration=True, params={'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(ip, server_port), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(ip, client_port), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1', client_port), 'INITIAL_CLUSTER': '{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(ip, server_port)), 'INITIAL_STATE': 'new', 'INITIAL_PEERS': '-initial-advertise-peer-urls {0}'.format(EtcdInstaller.SERVER_URL.format(ip, server_port))}) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client, client_port=client_port) EtcdInstaller._logger.debug('Creating cluster "{0}" on {1} completed'.format(cluster_name, ip)) ServiceManager.register_service(service_metadata=params, node_name=node_name)
def extend_cluster(master_ip, new_ip, cluster_name, server_port=DEFAULT_SERVER_PORT, client_port=DEFAULT_CLIENT_PORT): """ Extends a cluster to a given new node :param master_ip: IP of one of the already existing nodes :type master_ip: str :param new_ip: IP address of the node to be added :type new_ip: str :param cluster_name: Name of the cluster to be extended :type cluster_name: str :param server_port: Port to be used by server :type server_port: int :param client_port: Port to be used by client :type client_port: int """ EtcdInstaller._logger.debug('Extending cluster "{0}" from {1} to {2}'.format(cluster_name, master_ip, new_ip)) master_client = SSHClient(master_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, master_client, client_port=client_port): raise RuntimeError('Cluster "{0}" unhealthy, aborting extend'.format(cluster_name)) command = ['etcdctl', 'member', 'list'] new_server_url = EtcdInstaller.SERVER_URL.format(new_ip, server_port) if client_port != EtcdInstaller.DEFAULT_CLIENT_PORT: command = ['etcdctl', '--peers={0}:{1}'.format(master_ip, client_port), 'member', 'list'] cluster_members = master_client.run(command).splitlines() for cluster_member in cluster_members: if new_server_url in cluster_member: EtcdInstaller._logger.info('Node {0} already member of etcd cluster'.format(new_ip)) return current_cluster = [] for item in cluster_members: info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() current_cluster.append('{0}={1}'.format(info['name'], info['peer'])) new_client = SSHClient(new_ip, username='******') node_name = System.get_my_machine_id(new_client) current_cluster.append('{0}={1}'.format(node_name, new_server_url)) data_dir = EtcdInstaller.DATA_DIR.format(cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name) abs_paths = [data_dir, wal_dir] new_client.dir_delete(abs_paths) new_client.dir_create(abs_paths) new_client.dir_chmod(abs_paths, 0755, recursive=True) new_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) EtcdInstaller.stop(cluster_name, new_client) # Stop a possible proxy service params = ServiceManager.add_service(name=base_name, client=new_client, target_name=target_name, delay_registration=True, params={'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': new_server_url, 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip, client_port), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1', client_port), 'INITIAL_CLUSTER': ','.join(current_cluster), 'INITIAL_STATE': 'existing', 'INITIAL_PEERS': ''}) add_command = ['etcdctl', 'member', 'add', node_name, new_server_url] if client_port != EtcdInstaller.DEFAULT_CLIENT_PORT: add_command = ['etcdctl', '--peers={0}:{1}'.format(master_ip, client_port), 'member', 'add', node_name, new_server_url] master_client.run(add_command) EtcdInstaller.start(cluster_name, new_client) EtcdInstaller.wait_for_cluster(cluster_name, new_client, client_port=client_port) ServiceManager.register_service(service_metadata=params, node_name=node_name) EtcdInstaller._logger.debug('Extending cluster "{0}" from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq): """ Promotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.servicelist import ServiceList from ovs.dal.hybrids.service import Service Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True) if configure_memcached is True: if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False: raise RuntimeError('Not all memcache nodes can be reached which is required for promoting a node.') target_client = ip_client_map[cluster_ip] machine_id = System.get_my_machine_id(target_client) node_name, _ = target_client.get_hostname() master_client = ip_client_map[master_ip] storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'MASTER' storagerouter.save() external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: config_store = Configuration.get_store() if config_store == 'arakoon': Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster') metadata = ArakoonInstaller.extend_cluster(master_ip=master_ip, new_ip=cluster_ip, cluster_name='config', base_dir=Configuration.get('/ovs/framework/paths|ovsdb'), ports=[26400, 26401], filesystem=True) ArakoonInstaller.restart_cluster_add(cluster_name='config', current_ips=metadata['ips'], new_ip=cluster_ip, filesystem=True) ServiceManager.register_service(node_name=machine_id, service_metadata=metadata['service_metadata']) else: from ovs.extensions.db.etcd.installer import EtcdInstaller Toolbox.log(logger=NodeTypeController._logger, messages='Joining Etcd cluster') EtcdInstaller.extend_cluster(master_ip, cluster_ip, 'config') # Find other (arakoon) master nodes arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False) config.load_config() master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError('There should be at least one other master node') arakoon_ports = [] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster') result = ArakoonInstaller.extend_cluster(master_ip=master_ip, new_ip=cluster_ip, cluster_name=arakoon_cluster_name, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name, current_ips=result['ips'], new_ip=cluster_ip, filesystem=False) arakoon_ports = [result['client_port'], result['messaging_port']] if configure_memcached is True: NodeTypeController.configure_memcached(client=target_client, logger=NodeTypeController._logger) NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') if configure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:11211'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if configure_rabbitmq is True: endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints') endpoint = '{0}:5672'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name, current_ips=master_node_ips, new_ip=cluster_ip, filesystem=False) PersistentFactory.store = None VolatileFactory.store = None if 'arakoon-ovsdb' not in [s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip]: service = Service() service.name = 'arakoon-ovsdb' service.type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON) service.ports = arakoon_ports service.storagerouter = storagerouter service.save() if configure_rabbitmq is True: NodeTypeController.configure_rabbitmq(client=target_client, logger=NodeTypeController._logger) # Copy rabbitmq cookie rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie' Toolbox.log(logger=NodeTypeController._logger, messages='Copying Rabbit MQ cookie') contents = master_client.file_read(rabbitmq_cookie_file) master_hostname, _ = master_client.get_hostname() target_client.dir_create(os.path.dirname(rabbitmq_cookie_file)) target_client.file_write(rabbitmq_cookie_file, contents) target_client.file_chmod(rabbitmq_cookie_file, mode=400) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run(['rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname)]) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) # Enable HA for the rabbitMQ queues Toolbox.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger) NodeTypeController.check_rabbitmq_and_enable_ha_mode(client=target_client, logger=NodeTypeController._logger) NodeTypeController._configure_amqp_to_volumedriver() Toolbox.log(logger=NodeTypeController._logger, messages='Starting services') services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server', 'etcd-config'] if arakoon_metadata['internal'] is True: services.remove('arakoon-ovsdb') for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.change_service_state(target_client, service, 'start', NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger) if Toolbox.run_hooks(component='nodetype', sub_component='promote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger) if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER') target_client.run(['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config']) Configuration.set('/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True) if target_client.file_exists('/tmp/ovs_rollback'): target_client.file_delete('/tmp/ovs_rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')