def update_storagedrivers(storagedriver_guids, storagerouters, parameters): """ Add/remove multiple vPools @param storagedriver_guids: Storage Drivers to be removed @param storagerouters: StorageRouters on which to add a new link @param parameters: Settings for new links """ success = True # Add Storage Drivers for storagerouter_ip, storageapplaince_machineid in storagerouters: try: new_parameters = copy.copy(parameters) new_parameters['storagerouter_ip'] = storagerouter_ip local_machineid = System.get_my_machine_id() if local_machineid == storageapplaince_machineid: # Inline execution, since it's on the same node (preventing deadlocks) StorageRouterController.add_vpool(new_parameters) else: # Async execution, since it has to be executed on another node # @TODO: Will break in Celery 3.2, need to find another solution # Requirements: # - This code cannot continue until this new task is completed (as all these Storage Router # need to be handled sequentially # - The wait() or get() method are not allowed anymore from within a task to prevent deadlocks result = StorageRouterController.add_vpool.s(new_parameters).apply_async( routing_key='sr.{0}'.format(storageapplaince_machineid) ) result.wait() except: success = False # Remove Storage Drivers for storagedriver_guid in storagedriver_guids: try: storagedriver = StorageDriver(storagedriver_guid) storagerouter_machineid = storagedriver.storagerouter.machine_id local_machineid = System.get_my_machine_id() if local_machineid == storagerouter_machineid: # Inline execution, since it's on the same node (preventing deadlocks) StorageRouterController.remove_storagedriver(storagedriver_guid) else: # Async execution, since it has to be executed on another node # @TODO: Will break in Celery 3.2, need to find another solution # Requirements: # - This code cannot continue until this new task is completed (as all these VSAs need to be # handled sequentially # - The wait() or get() method are not allowed anymore from within a task to prevent deadlocks result = StorageRouterController.remove_storagedriver.s(storagedriver_guid).apply_async( routing_key='sr.{0}'.format(storagerouter_machineid) ) result.wait() except: success = False return success
def create_cluster(cluster_name, ip, exclude_ports, plugins=None): """ Creates a cluster """ logger.debug('Creating cluster {0} on {1}'.format(cluster_name, ip)) client = SSHClient(ip) base_dir = client.config_read('ovs.arakoon.location').rstrip('/') port_range = client.config_read('ovs.ports.arakoon') ports = System.get_free_ports(port_range, exclude_ports, 2, client) node_name = System.get_my_machine_id(client) config = ArakoonClusterConfig(cluster_name, plugins) if not [node.name for node in config.nodes if node.name == node_name]: config.nodes.append( ArakoonNodeConfig( name=node_name, ip=ip, client_port=ports[0], messaging_port=ports[1], log_dir=ArakoonInstaller.ARAKOON_LOG_DIR.format( cluster_name), home=ArakoonInstaller.ARAKOON_HOME_DIR.format( base_dir, cluster_name), tlog_dir=ArakoonInstaller.ARAKOON_TLOG_DIR.format( base_dir, cluster_name))) ArakoonInstaller._deploy(config) logger.debug('Creating cluster {0} on {1} completed'.format( cluster_name, ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def extend_cluster(master_ip, new_ip, cluster_name, exclude_ports): """ Extends a cluster to a given new node """ logger.debug('Extending cluster {0} from {1} to {2}'.format( cluster_name, master_ip, new_ip)) client = SSHClient(master_ip) config = ArakoonClusterConfig(cluster_name) config.load_config(client) client = SSHClient(new_ip) base_dir = client.config_read('ovs.arakoon.location').rstrip('/') port_range = client.config_read('ovs.ports.arakoon') ports = System.get_free_ports(port_range, exclude_ports, 2, client) node_name = System.get_my_machine_id(client) if not [node.name for node in config.nodes if node.name == node_name]: config.nodes.append( ArakoonNodeConfig( name=node_name, ip=new_ip, client_port=ports[0], messaging_port=ports[1], log_dir=ArakoonInstaller.ARAKOON_LOG_DIR.format( cluster_name), home=ArakoonInstaller.ARAKOON_HOME_DIR.format( base_dir, cluster_name), tlog_dir=ArakoonInstaller.ARAKOON_TLOG_DIR.format( base_dir, cluster_name))) ArakoonInstaller._deploy(config) logger.debug('Extending cluster {0} from {1} to {2} completed'.format( cluster_name, master_ip, new_ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def extend_cluster(master_ip, new_ip, cluster_name, exclude_ports, base_dir): """ Extends a cluster to a given new node """ logger.debug("Extending cluster {0} from {1} to {2}".format(cluster_name, master_ip, new_ip)) client = SSHClient(master_ip) config = ArakoonClusterConfig(cluster_name) config.load_config(client) client = SSHClient(new_ip) base_dir = base_dir.rstrip("/") port_range = client.config_read("ovs.ports.arakoon") ports = System.get_free_ports(port_range, exclude_ports, 2, client) node_name = System.get_my_machine_id(client) if not [node.name for node in config.nodes if node.name == node_name]: config.nodes.append( ArakoonNodeConfig( name=node_name, ip=new_ip, client_port=ports[0], messaging_port=ports[1], log_dir=ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name), home=ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name), tlog_dir=ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name), ) ) ArakoonInstaller._deploy(config) logger.debug("Extending cluster {0} from {1} to {2} completed".format(cluster_name, master_ip, new_ip)) return {"client_port": ports[0], "messaging_port": ports[1]}
def pulse(): """ Update the heartbeats for all Storage Routers :return: None """ logger = LogHandler.get('extensions', name='heartbeat') current_time = int(time.time()) machine_id = System.get_my_machine_id() amqp = '{0}://{1}:{2}@{3}//'.format(EtcdConfiguration.get('/ovs/framework/messagequeue|protocol'), EtcdConfiguration.get('/ovs/framework/messagequeue|user'), EtcdConfiguration.get('/ovs/framework/messagequeue|password'), EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format(machine_id))) celery_path = OSManager.get_path('celery') worker_states = check_output("{0} inspect ping -b {1} --timeout=5 2> /dev/null | grep OK | perl -pe 's/\x1b\[[0-9;]*m//g' || true".format(celery_path, amqp), shell=True) routers = StorageRouterList.get_storagerouters() for node in routers: if node.heartbeats is None: node.heartbeats = {} if 'celery@{0}: OK'.format(node.name) in worker_states: node.heartbeats['celery'] = current_time if node.machine_id == machine_id: node.heartbeats['process'] = current_time else: try: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats: if current_time - node.heartbeats['process'] >= HeartBeat.ARP_TIMEOUT: check_output("/usr/sbin/arp -d {0}".format(node.name), shell=True) except CalledProcessError: logger.exception('Error clearing ARP cache') node.save()
def migrate(master_ips=None, extra_ips=None): """ Executes all migrations. It keeps track of an internal "migration version" which is always increasing by one :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ machine_id = System.get_my_machine_id() key = '/ovs/framework/hosts/{0}/versions'.format(machine_id) data = Configuration.get(key) if Configuration.exists(key) else {} migrators = [] path = '/'.join([os.path.dirname(__file__), 'migration']) for filename in os.listdir(path): if os.path.isfile('/'.join([path, filename])) and filename.endswith('.py'): name = filename.replace('.py', '') module = imp.load_source(name, '/'.join([path, filename])) for member in inspect.getmembers(module): if inspect.isclass(member[1]) and member[1].__module__ == name and 'object' in [base.__name__ for base in member[1].__bases__]: migrators.append((member[1].identifier, member[1].migrate)) end_version = 0 for identifier, method in migrators: base_version = data[identifier] if identifier in data else 0 version = method(base_version, master_ips, extra_ips) if version > end_version: end_version = version data[identifier] = end_version Configuration.set(key, data)
def tick(self): """ Runs one iteration of the scheduler. This is guarded with a distributed lock """ self._logger.debug('DS executing tick') try: self._has_lock = False with self._mutex: # noinspection PyProtectedMember node_now = current_app._get_current_object().now() node_timestamp = time.mktime(node_now.timetuple()) node_name = System.get_my_machine_id() try: lock = self._persistent.get(self._lock_name) except KeyNotFoundException: lock = None if lock is None: # There is no lock yet, so the lock is acquired self._has_lock = True self._logger.debug('DS there was no lock in tick') else: if lock['name'] == node_name: # The current node holds the lock self._logger.debug('DS keeps own lock') self._has_lock = True elif node_timestamp - lock[ 'timestamp'] > DistributedScheduler.TIMEOUT: # The current lock is timed out, so the lock is stolen self._logger.debug( 'DS last lock refresh is {0}s old'.format( node_timestamp - lock['timestamp'])) self._logger.debug('DS stealing lock from {0}'.format( lock['name'])) self._load_schedule() self._has_lock = True else: self._logger.debug('DS lock is not ours') if self._has_lock is True: lock = {'name': node_name, 'timestamp': node_timestamp} self._logger.debug('DS refreshing lock') self._persistent.set(self._lock_name, lock) if self._has_lock is True: self._logger.debug('DS executing tick workload') remaining_times = [] try: for entry in self.schedule.itervalues(): next_time_to_run = self.maybe_due( entry, self.publisher) if next_time_to_run: remaining_times.append(next_time_to_run) except RuntimeError: pass self._logger.debug('DS executing tick workload - done') return min(remaining_times + [self.max_interval]) else: return self.max_interval except Exception as ex: self._logger.debug('DS got error during tick: {0}'.format(ex)) return self.max_interval
def pulse(): """ Update the heartbeats for the Current Routers :return: None """ logger = Logger('extensions-generic') machine_id = System.get_my_machine_id() current_time = int(time.time()) routers = StorageRouterList.get_storagerouters() for node in routers: if node.machine_id == machine_id: with volatile_mutex('storagerouter_heartbeat_{0}'.format( node.guid)): node_save = StorageRouter(node.guid) node_save.heartbeats['process'] = current_time node_save.save() StorageRouterController.ping.s( node.guid, current_time).apply_async( routing_key='sr.{0}'.format(machine_id)) else: try: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats: if current_time - node.heartbeats[ 'process'] >= HeartBeat.ARP_TIMEOUT: check_output("/usr/sbin/arp -d '{0}'".format( node.name.replace(r"'", r"'\''")), shell=True) except CalledProcessError: logger.exception('Error clearing ARP cache')
def _get_free_ports(client): node_name = System.get_my_machine_id(client) clusters = [] exclude_ports = [] if EtcdConfiguration.dir_exists(ArakoonInstaller.ETCD_CONFIG_ROOT): for cluster_name in EtcdConfiguration.list( ArakoonInstaller.ETCD_CONFIG_ROOT): try: config = ArakoonClusterConfig(cluster_name) config.load_config() for node in config.nodes: if node.name == node_name: clusters.append(cluster_name) exclude_ports.append(node.client_port) exclude_ports.append(node.messaging_port) except: logger.error( ' Could not load port information of cluster {0}'. format(cluster_name)) ports = System.get_free_ports( EtcdConfiguration.get( '/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)), exclude_ports, 2, client) logger.debug( ' Loaded free ports {0} based on existing clusters {1}'.format( ports, clusters)) return ports
def pulse(): """ Update the heartbeats for the Current Routers :return: None """ logger = LogHandler.get('extensions', name='heartbeat') machine_id = System.get_my_machine_id() current_time = int(time.time()) routers = StorageRouterList.get_storagerouters() for node in routers: if node.machine_id == machine_id: with volatile_mutex('storagerouter_heartbeat_{0}'.format(node.guid)): node_save = StorageRouter(node.guid) node_save.heartbeats['process'] = current_time node_save.save() StorageRouterController.ping.s(node.guid, current_time).apply_async(routing_key='sr.{0}'.format(machine_id)) else: try: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats: if current_time - node.heartbeats['process'] >= HeartBeat.ARP_TIMEOUT: check_output("/usr/sbin/arp -d '{0}'".format(node.name.replace(r"'", r"'\''")), shell=True) except CalledProcessError: logger.exception('Error clearing ARP cache')
def extend_cluster(master_ip, new_ip, cluster_name, base_dir, locked=True): """ Extends a cluster to a given new node :param master_ip: IP of one of the already existing nodes :type master_ip: str :param new_ip: IP address of the node to be added :type new_ip: str :param cluster_name: Name of the cluster to be extended :type cluster_name: str :param base_dir: Base directory that will hold the db and tlogs :type base_dir: str :param locked: Indicates whether the extend should run in a locked context (e.g. to prevent port conflicts) :type locked: bool :return: Ports used by arakoon cluster :rtype: dict """ ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2}'.format(cluster_name, master_ip, new_ip)) base_dir = base_dir.rstrip('/') config = ArakoonClusterConfig(cluster_name) config.load_config() client = SSHClient(new_ip, username=ArakoonInstaller.SSHCLIENT_USER) node_name = System.get_my_machine_id(client) home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name) log_dir = ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name) tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name) ArakoonInstaller.clean_leftover_arakoon_data(new_ip, {log_dir: True, home_dir: False, tlog_dir: False}) port_mutex = None try: if locked is True: from ovs.extensions.generic.volatilemutex import volatile_mutex port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(new_ip)) port_mutex.acquire(wait=60) ports = ArakoonInstaller._get_free_ports(client) if node_name not in [node.name for node in config.nodes]: config.nodes.append(ArakoonNodeConfig(name=node_name, ip=new_ip, client_port=ports[0], messaging_port=ports[1], log_dir=log_dir, home=home_dir, tlog_dir=tlog_dir)) ArakoonInstaller._deploy(config) finally: if port_mutex is not None: port_mutex.release() ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2} completed'.format(cluster_name, master_ip, new_ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def add_services(client, node_type, logger): """ Add the services required by the OVS cluster :param client: Client on which to add the services :type client: ovs.extensions.generic.sshclient.SSHClient :param node_type: Type of node ('master' or 'extra') :type node_type: str :param logger: Logger object used for logging :type logger: ovs.log.log_handler.LogHandler :return: None """ Toolbox.log(logger=logger, messages='Adding services') services = {} worker_queue = System.get_my_machine_id(client=client) if node_type == 'master': worker_queue += ',ovs_masters' services.update({'memcached': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue}, 'rabbitmq-server': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue}, 'scheduled-tasks': {}, 'webapp-api': {}, 'volumerouter-consumer': {}}) services.update({'workers': {'WORKER_QUEUE': worker_queue}, 'watcher-framework': {}}) for service_name, params in services.iteritems(): if not ServiceManager.has_service(service_name, client): Toolbox.log(logger=logger, messages='Adding service {0}'.format(service_name)) ServiceManager.add_service(name=service_name, params=params, client=client)
def process_IN_MOVED_TO(self, event): try: self._logger.debug('path: {0} - name: {1} - moved to'.format( event.path, event.name)) if self._is_run_watcher(event.path): self.invalidate_vmachine_status(event.name) return vpool_path = '/mnt/' + self.get_vpool_for_vm(event.pathname) if vpool_path == '/mnt/': self._logger.warning( 'Vmachine not on vpool or invalid xml format for {0}'. format(event.pathname)) if os.path.exists(vpool_path): machine_id = System.get_my_machine_id() target_path = vpool_path + '/' + machine_id + '/' target_xml = target_path + event.name if not os.path.exists(target_path): os.mkdir(target_path) shutil.copy2(event.pathname, target_xml) except Exception as exception: self._logger.error( 'Exception during process_IN_MOVED_TO: {0}'.format( str(exception)), print_msg=True)
def create_cluster(cluster_name, ip, exclude_ports, base_dir, plugins=None): """ Creates a cluster """ logger.debug("Creating cluster {0} on {1}".format(cluster_name, ip)) client = SSHClient(ip) base_dir = base_dir.rstrip("/") port_range = client.config_read("ovs.ports.arakoon") ports = System.get_free_ports(port_range, exclude_ports, 2, client) node_name = System.get_my_machine_id(client) config = ArakoonClusterConfig(cluster_name, plugins) if not [node.name for node in config.nodes if node.name == node_name]: config.nodes.append( ArakoonNodeConfig( name=node_name, ip=ip, client_port=ports[0], messaging_port=ports[1], log_dir=ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name), home=ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name), tlog_dir=ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name), ) ) ArakoonInstaller._deploy(config) logger.debug("Creating cluster {0} on {1} completed".format(cluster_name, ip)) return {"client_port": ports[0], "messaging_port": ports[1]}
def __init__(self): """ Initializes the client """ # Safe calls self._node_id = System.get_my_machine_id().replace(r"'", r"'\''") # Alba is currently always installed but the Alba version/package info is located in the SDM section self._package_manager = PackageFactory.get_manager() self._service_manager = ServiceFactory.get_manager() self._service_type = ServiceFactory.get_service_type() if self._service_type != 'systemd': raise NotImplementedError('Only Systemd is supported') # Potential failing calls self._cluster_id = self.get_config_key( self.LOCATION_CLUSTER_ID, fallback=[CONFIG_STORE_LOCATION, 'cluster_id']) self.interval = self.get_config_key( self.LOCATION_INTERVAL, fallback=[self.FALLBACK_CONFIG, self.KEY_INTERVAL], default=self.DEFAULT_INTERVAL) self._openvpn_service_name = 'openvpn@ovs_{0}-{1}'.format( self._cluster_id, self._node_id) # Calls to look out for. These could still be None when using them self._storagerouter = None self._client = None self._set_storagerouter() self._set_client() # Safe call, start caching self.caching = SupportAgentCache(self)
def shrink_cluster(remaining_node_ip, deleted_node_ip, cluster_name): """ Removes a node from a cluster, the old node will become a slave :param cluster_name: The name of the cluster to shrink :param deleted_node_ip: The ip of the node that should be deleted :param remaining_node_ip: The ip of a remaining node """ logger.debug('Shrinking cluster "{0}" from {1}'.format( cluster_name, deleted_node_ip)) current_client = SSHClient(remaining_node_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, current_client): raise RuntimeError( 'Cluster "{0}" unhealthy, aborting shrink'.format( cluster_name)) old_client = SSHClient(deleted_node_ip, username='******') node_name = System.get_my_machine_id(old_client) node_id = None for item in current_client.run('etcdctl member list').splitlines(): info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() if info['name'] == node_name: node_id = info['id'] if node_id is None: raise RuntimeError( 'Could not locate {0} in the cluster'.format(deleted_node_ip)) current_client.run('etcdctl member remove {0}'.format(node_id)) EtcdInstaller.deploy_to_slave(remaining_node_ip, deleted_node_ip, cluster_name) EtcdInstaller.wait_for_cluster(cluster_name, current_client) logger.debug('Shrinking cluster "{0}" from {1} completed'.format( cluster_name, deleted_node_ip))
def shrink_cluster(remaining_node_ip, deleted_node_ip, cluster_name): """ Removes a node from a cluster, the old node will become a slave :param cluster_name: The name of the cluster to shrink :param deleted_node_ip: The ip of the node that should be deleted :param remaining_node_ip: The ip of a remaining node """ logger.debug('Shrinking cluster "{0}" from {1}'.format(cluster_name, deleted_node_ip)) current_client = SSHClient(remaining_node_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, current_client): raise RuntimeError('Cluster "{0}" unhealthy, aborting shrink'.format(cluster_name)) old_client = SSHClient(deleted_node_ip, username='******') node_name = System.get_my_machine_id(old_client) node_id = None for item in current_client.run('etcdctl member list').splitlines(): info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() if info['name'] == node_name: node_id = info['id'] if node_id is None: raise RuntimeError('Could not locate {0} in the cluster'.format(deleted_node_ip)) current_client.run('etcdctl member remove {0}'.format(node_id)) EtcdInstaller.deploy_to_slave(remaining_node_ip, deleted_node_ip, cluster_name) EtcdInstaller.wait_for_cluster(cluster_name, current_client) logger.debug('Shrinking cluster "{0}" from {1} completed'.format(cluster_name, deleted_node_ip))
def invalidate_vmachine_status(self, name): if not name.endswith('.xml'): return devicename = '{0}/{1}'.format(System.get_my_machine_id(), name) vm = VMachineList().get_by_devicename_and_vpool(devicename, None) if vm: vm.invalidate_dynamics() logger.debug('Hypervisor status invalidated for: {0}'.format(name))
def shrink_cluster(remaining_node_ip, deleted_node_ip, cluster_name): ai = ArakoonInstaller() ai.load_config_from(cluster_name, remaining_node_ip) client = SSHClient.load(deleted_node_ip) deleted_node_id = System.get_my_machine_id(client) ai.delete_dir_structure(client) ai.remove_node_from_config(deleted_node_id) ai.upload_config_for(cluster_name)
def run_event_consumer(): """ Check whether to run the event consumer """ rmq_config = RawConfigParser() rmq_config.read(os.path.join(Configuration.get('ovs.core.cfgdir'), 'rabbitmqclient.cfg')) machine_id = System.get_my_machine_id() return rmq_config.has_section(machine_id)
def extend_cluster(master_ip, new_ip, cluster_name): """ Extends a cluster to a given new node :param cluster_name: Name of the cluster to be extended :param new_ip: IP address of the node to be added :param master_ip: IP of one of the already existing nodes """ logger.debug('Extending cluster "{0}" from {1} to {2}'.format(cluster_name, master_ip, new_ip)) client = SSHClient(master_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, client): raise RuntimeError('Cluster "{0}" unhealthy, aborting extend'.format(cluster_name)) cluster_members = client.run('etcdctl member list').splitlines() for cluster_member in cluster_members: if EtcdInstaller.SERVER_URL.format(new_ip) in cluster_member: logger.info('Node {0} already member of etcd cluster'.format(new_ip)) return current_cluster = [] for item in client.run('etcdctl member list').splitlines(): info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() current_cluster.append('{0}={1}'.format(info['name'], info['peer'])) client = SSHClient(new_ip, username='******') node_name = System.get_my_machine_id(client) current_cluster.append('{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(new_ip))) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) EtcdInstaller.stop(cluster_name, client) # Stop a possible proxy service ServiceManager.add_service(base_name, client, params={'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(new_ip), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': ','.join(current_cluster), 'INITIAL_STATE': 'existing', 'INITIAL_PEERS': ''}, target_name=target_name) master_client = SSHClient(master_ip, username='******') master_client.run('etcdctl member add {0} {1}'.format(node_name, EtcdInstaller.SERVER_URL.format(new_ip))) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client) logger.debug('Extending cluster "{0}" from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
def extend_cluster(master_ip, new_ip, cluster_name, base_dir, locked=True): """ Extends a cluster to a given new node :param master_ip: IP of one of the already existing nodes :type master_ip: str :param new_ip: IP address of the node to be added :type new_ip: str :param cluster_name: Name of the cluster to be extended :type cluster_name: str :param base_dir: Base directory that will hold the db and tlogs :type base_dir: str :param locked: Indicates whether the extend should run in a locked context (e.g. to prevent port conflicts) :type locked: bool :return: Ports used by arakoon cluster :rtype: dict """ ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2}'.format(cluster_name, master_ip, new_ip)) base_dir = base_dir.rstrip('/') config = ArakoonClusterConfig(cluster_name) config.load_config() client = SSHClient(new_ip, username=ArakoonInstaller.SSHCLIENT_USER) node_name = System.get_my_machine_id(client) home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name) tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name) ArakoonInstaller.clean_leftover_arakoon_data(new_ip, [home_dir, tlog_dir]) port_mutex = None try: if locked is True: from ovs.extensions.generic.volatilemutex import volatile_mutex port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(new_ip)) port_mutex.acquire(wait=60) ports = ArakoonInstaller._get_free_ports(client) if node_name not in [node.name for node in config.nodes]: config.nodes.append(ArakoonNodeConfig(name=node_name, ip=new_ip, client_port=ports[0], messaging_port=ports[1], log_sinks=LogHandler.get_sink_path('arakoon_server'), crash_log_sinks=LogHandler.get_sink_path('arakoon_server_crash'), home=home_dir, tlog_dir=tlog_dir)) ArakoonInstaller._deploy(config) finally: if port_mutex is not None: port_mutex.release() ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2} completed'.format(cluster_name, master_ip, new_ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def start(self, daemon=True): """ start all nodes in the cluster """ from ovs.extensions.db.arakoon.arakoon.CheckArakoonTlogMark import CheckArakoonTlogMark CheckArakoonTlogMark().fixtlogs(self._clusterName, always_stop=True) node_name = System.get_my_machine_id() self._start_one_ex(node_name, daemon)
def start(self, daemon=True): """ start all nodes in the cluster """ from ovs.extensions.db.arakoon.CheckArakoonTlogMark import CheckArakoonTlogMark CheckArakoonTlogMark().fixtlogs(self._clusterName, always_stop=True) node_name = System.get_my_machine_id() self._start_one_ex(node_name, daemon)
def _gatherlocalnodes(self, cluster): """ gather all localnodes for all clusters """ localnodes = [System.get_my_machine_id()] # cluster.listLocalNodes() CheckArakoonTlogMark._speak('Found local nodes {0}'.format(localnodes)) for localnode in localnodes: self._localnodesfiles[localnode] = dict() self._localnodesfiles[localnode]['cluster'] = cluster
def run_event_consumer(): """ Check whether to run the event consumer """ my_ip = EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format(System.get_my_machine_id())) for endpoint in EtcdConfiguration.get('/ovs/framework/messagequeue|endpoints'): if endpoint.startswith(my_ip): return True return False
def create_cluster(cluster_name, ip): """ Creates a cluster :param base_dir: Base directory that should contain the data :param ip: IP address of the first node of the new cluster :param cluster_name: Name of the cluster """ logger.debug('Creating cluster "{0}" on {1}'.format(cluster_name, ip)) client = SSHClient(ip, username='******') node_name = System.get_my_machine_id(client) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) ServiceManager.add_service( base_name, client, params={ 'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(ip), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(ip), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': '{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(ip)), 'INITIAL_STATE': 'new', 'INITIAL_PEERS': '-initial-advertise-peer-urls {0}'.format( EtcdInstaller.SERVER_URL.format(ip)) }, target_name=target_name) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client) logger.debug('Creating cluster "{0}" on {1} completed'.format( cluster_name, ip))
def tick(self): """ Runs one iteration of the scheduler. This is guarded with a distributed lock """ self._has_lock = False try: logger.debug('DS executing tick') self._mutex.acquire(wait=10) node_now = current_app._get_current_object().now() node_timestamp = time.mktime(node_now.timetuple()) node_name = System.get_my_machine_id() try: lock = self._persistent.get('{0}_lock'.format(self._namespace)) except KeyNotFoundException: lock = None if lock is None: # There is no lock yet, so the lock is acquired self._has_lock = True logger.debug('DS there was no lock in tick') else: if lock['name'] == node_name: # The current node holds the lock logger.debug('DS keeps own lock') self._has_lock = True elif node_timestamp - lock['timestamp'] > DistributedScheduler.TIMEOUT: # The current lock is timed out, so the lock is stolen logger.debug('DS last lock refresh is {0}s old'.format( node_timestamp - lock['timestamp'])) logger.debug( 'DS stealing lock from {0}'.format(lock['name'])) self._load_schedule() self._has_lock = True else: logger.debug('DS lock is not ours') if self._has_lock is True: lock = {'name': node_name, 'timestamp': node_timestamp} logger.debug('DS refreshing lock') self._persistent.set('{0}_lock'.format(self._namespace), lock) finally: self._mutex.release() if self._has_lock is True: logger.debug('DS executing tick workload') remaining_times = [] try: for entry in self.schedule.itervalues(): next_time_to_run = self.maybe_due(entry, self.publisher) if next_time_to_run: remaining_times.append(next_time_to_run) except RuntimeError: pass logger.debug('DS executing tick workload - done') return min(remaining_times + [self.max_interval]) else: return self.max_interval
def run_event_consumer(): """ Check whether to run the event consumer """ rmq_config = RawConfigParser() rmq_config.read( os.path.join(Configuration.get('ovs.core.cfgdir'), 'rabbitmqclient.cfg')) machine_id = System.get_my_machine_id() return rmq_config.has_section(machine_id)
def extend_cluster(master_ip, new_ip, cluster_name, base_dir): """ Extends a cluster to a given new node :param base_dir: Base directory that will hold the db and tlogs :param cluster_name: Name of the cluster to be extended :param new_ip: IP address of the node to be added :param master_ip: IP of one of the already existing nodes """ logger.debug('Extending cluster {0} from {1} to {2}'.format( cluster_name, master_ip, new_ip)) base_dir = base_dir.rstrip('/') from ovs.extensions.generic.volatilemutex import VolatileMutex port_mutex = VolatileMutex('arakoon_install_ports_{0}'.format(new_ip)) config = ArakoonClusterConfig(cluster_name) config.load_config() client = SSHClient(new_ip) node_name = System.get_my_machine_id(client) home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format( base_dir, cluster_name) log_dir = ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name) tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format( base_dir, cluster_name) ArakoonInstaller.archive_existing_arakoon_data( new_ip, home_dir, ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir), cluster_name) ArakoonInstaller.archive_existing_arakoon_data( new_ip, log_dir, ArakoonInstaller.ARAKOON_LOG_DIR.format(''), cluster_name) ArakoonInstaller.archive_existing_arakoon_data( new_ip, tlog_dir, ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir), cluster_name) try: port_mutex.acquire(wait=60) ports = ArakoonInstaller._get_free_ports(client) if node_name not in [node.name for node in config.nodes]: config.nodes.append( ArakoonNodeConfig(name=node_name, ip=new_ip, client_port=ports[0], messaging_port=ports[1], log_dir=log_dir, home=home_dir, tlog_dir=tlog_dir)) ArakoonInstaller._deploy(config) finally: port_mutex.release() logger.debug('Extending cluster {0} from {1} to {2} completed'.format( cluster_name, master_ip, new_ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def create_cluster(cluster_name, ip, base_dir, plugins=None, locked=True): """ Creates a cluster :param locked: Indicates whether the create should run in a locked context (e.g. to prevent port conflicts) :param plugins: Plugins that should be added to the configuration file :param base_dir: Base directory that should contain the data and tlogs :param ip: IP address of the first node of the new cluster :param cluster_name: Name of the cluster """ logger.debug('Creating cluster {0} on {1}'.format(cluster_name, ip)) base_dir = base_dir.rstrip('/') client = SSHClient(ip) node_name = System.get_my_machine_id(client) home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format( base_dir, cluster_name) log_dir = ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name) tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format( base_dir, cluster_name) ArakoonInstaller.archive_existing_arakoon_data( ip, home_dir, ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir), cluster_name) ArakoonInstaller.archive_existing_arakoon_data( ip, log_dir, ArakoonInstaller.ARAKOON_LOG_DIR.format(''), cluster_name) ArakoonInstaller.archive_existing_arakoon_data( ip, tlog_dir, ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir), cluster_name) port_mutex = None try: if locked is True: from ovs.extensions.generic.volatilemutex import VolatileMutex port_mutex = VolatileMutex( 'arakoon_install_ports_{0}'.format(ip)) port_mutex.acquire(wait=60) ports = ArakoonInstaller._get_free_ports(client) config = ArakoonClusterConfig(cluster_name, plugins) config.nodes.append( ArakoonNodeConfig(name=node_name, ip=ip, client_port=ports[0], messaging_port=ports[1], log_dir=log_dir, home=home_dir, tlog_dir=tlog_dir)) ArakoonInstaller._deploy(config) finally: if port_mutex is not None: port_mutex.release() logger.debug('Creating cluster {0} on {1} completed'.format( cluster_name, ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def run_event_consumer(): """ Check whether to run the event consumer """ my_ip = EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format( System.get_my_machine_id())) for endpoint in EtcdConfiguration.get( '/ovs/framework/messagequeue|endpoints'): if endpoint.startswith(my_ip): return True return False
class Helper(object): """ Helper module """ MODULE = "utils" SETTINGS_LOC = "/opt/OpenvStorage/config/healthcheck/settings.json" RAW_INIT_MANAGER = str( subprocess.check_output('cat /proc/1/comm', shell=True)).strip() LOCAL_SR = System.get_my_storagerouter() LOCAL_ID = System.get_my_machine_id() with open(SETTINGS_LOC) as settings_file: settings = json.load(settings_file) debug_mode = settings["healthcheck"]["debug_mode"] enable_logging = settings["healthcheck"]["logging"]["enable"] max_log_size = settings["healthcheck"]["max_check_log_size"] packages = settings["healthcheck"]["package_list"] extra_ports = settings["healthcheck"]["extra_ports"] rights_dirs = settings["healthcheck"]["rights_dirs"] owners_files = settings["healthcheck"]["owners_files"] max_hours_zero_disk_safety = settings["healthcheck"][ "max_hours_zero_disk_safety"] @staticmethod def get_healthcheck_version(): """ Gets the installed healthcheck version :return: version number of the installed healthcheck :rtype: str """ client = SSHClient(System.get_my_storagerouter()) package_name = 'openvstorage-health-check' package_manager = PackageFactory.get_manager() packages = package_manager.get_installed_versions( client=client, package_names=[package_name]) return packages.get(package_name, 'unknown') @staticmethod def get_local_settings(): """ Fetch settings of the local Open vStorage node :return: local settings of the node :rtype: dict """ # Fetch all details local_settings = { 'cluster_id': Configuration.get("/ovs/framework/cluster_id"), 'hostname': socket.gethostname(), 'storagerouter_id': Helper.LOCAL_ID, 'storagerouter_type': Helper.LOCAL_SR.node_type, 'environment os': ' '.join(platform.linux_distribution()) } return local_settings
def get_path(binary_name): machine_id = System.get_my_machine_id() config_location = '/ovs/framework/hosts/{0}/paths|{1}'.format(machine_id, binary_name) path = EtcdConfiguration.get(config_location) if not path: try: path = check_output('which {0}'.format(binary_name), shell=True).strip() EtcdConfiguration.set(config_location, path) except CalledProcessError: return None return path
def create_cluster(cluster_name, ip, base_dir, plugins=None, locked=True): """ Creates a cluster :param locked: Indicates whether the create should run in a locked context (e.g. to prevent port conflicts) :param plugins: Plugins that should be added to the configuration file :param base_dir: Base directory that should contain the data and tlogs :param ip: IP address of the first node of the new cluster :param cluster_name: Name of the cluster """ logger.debug('Creating cluster {0} on {1}'.format(cluster_name, ip)) base_dir = base_dir.rstrip('/') client = SSHClient(ip) if ArakoonInstaller.is_running(cluster_name, client): logger.info('Arakoon service running for cluster {0}'.format(cluster_name)) config = ArakoonClusterConfig(cluster_name, plugins) config.load_config() for node in config.nodes: if node.ip == ip: return {'client_port': node.client_port, 'messaging_port': node.messaging_port} node_name = System.get_my_machine_id(client) home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name) log_dir = ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name) tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name) ArakoonInstaller.clean_leftover_arakoon_data(ip, {log_dir: True, home_dir: False, tlog_dir: False}) port_mutex = None try: if locked is True: from ovs.extensions.generic.volatilemutex import VolatileMutex port_mutex = VolatileMutex('arakoon_install_ports_{0}'.format(ip)) port_mutex.acquire(wait=60) ports = ArakoonInstaller._get_free_ports(client) config = ArakoonClusterConfig(cluster_name, plugins) config.nodes.append(ArakoonNodeConfig(name=node_name, ip=ip, client_port=ports[0], messaging_port=ports[1], log_dir=log_dir, home=home_dir, tlog_dir=tlog_dir)) ArakoonInstaller._deploy(config) finally: if port_mutex is not None: port_mutex.release() logger.debug('Creating cluster {0} on {1} completed'.format(cluster_name, ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def _process_task(task, metadata, servicemanager): """ Processes a task """ try: SupportAgent._logger.debug('Processing: {0}'.format(task)) cid = Configuration.get('/ovs/framework/cluster_id').replace( r"'", r"'\''") nid = System.get_my_machine_id().replace(r"'", r"'\''") if task == 'OPEN_TUNNEL': if servicemanager == 'upstart': check_output('service openvpn stop', shell=True) else: check_output( "systemctl stop 'openvpn@ovs_{0}-{1}' || true".format( cid, nid), shell=True) check_output('rm -f /etc/openvpn/ovs_*', shell=True) for filename, contents in metadata['files'].iteritems(): with open(filename, 'w') as the_file: the_file.write(base64.b64decode(contents)) if servicemanager == 'upstart': check_output('service openvpn start', shell=True) else: check_output( "systemctl start 'openvpn@ovs_{0}-{1}'".format( cid, nid), shell=True) elif task == 'CLOSE_TUNNEL': if servicemanager == 'upstart': check_output('service openvpn stop', shell=True) else: check_output("systemctl stop 'openvpn@ovs_{0}-{1}'".format( cid, nid), shell=True) check_output('rm -f /etc/openvpn/ovs_*', shell=True) elif task == 'UPLOAD_LOGFILES': logfile = check_output('ovs collect logs', shell=True).strip() check_output( "mv '{0}' '/tmp/{1}'; curl -T '/tmp/{1}' 'ftp://{2}' --user '{3}:{4}'; rm -f '{0}' '/tmp/{1}'" .format(logfile.replace(r"'", r"'\''"), metadata['filename'].replace(r"'", r"'\''"), metadata['endpoint'].replace(r"'", r"'\''"), metadata['user'].replace(r"'", r"'\''"), metadata['password'].replace(r"'", r"'\''")), shell=True) else: raise RuntimeError('Unknown task') except Exception, ex: SupportAgent._logger.exception( 'Unexpected error while processing task {0} (data: {1}): {2}'. format(task, json.dumps(metadata), ex)) raise
def extend_cluster(src_ip, tgt_ip, cluster_name, exclude_ports): ai = ArakoonInstaller() ai.load_config_from(cluster_name, src_ip) client = SSHClient.load(tgt_ip) tgt_id = System.get_my_machine_id(client) port_range = System.read_remote_config(client, 'ovs.ports.arakoon') free_ports = System.get_free_ports(port_range, exclude_ports, 2, client) ai.create_dir_structure(client) ai.add_node_to_config(tgt_id, tgt_ip, free_ports[0], free_ports[1]) ai.upload_config_for(cluster_name) return {'client_port': free_ports[0], 'messaging_port': free_ports[1]}
def create(self): """ Prepares a new Storagedriver for a given vPool and Storagerouter :return: None :rtype: NoneType """ if self.sr_installer is None: raise RuntimeError('No StorageRouterInstaller instance found') machine_id = System.get_my_machine_id(client=self.sr_installer.root_client) port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id)) storagerouter = self.sr_installer.storagerouter with volatile_mutex('add_vpool_get_free_ports_{0}'.format(machine_id), wait=30): model_ports_in_use = [] for sd in StorageDriverList.get_storagedrivers(): if sd.storagerouter_guid == storagerouter.guid: model_ports_in_use += sd.ports.values() for proxy in sd.alba_proxies: model_ports_in_use.append(proxy.service.ports[0]) ports = System.get_free_ports(selected_range=port_range, exclude=model_ports_in_use, amount=4 + self.sr_installer.requested_proxies, client=self.sr_installer.root_client) vpool = self.vp_installer.vpool vrouter_id = '{0}{1}'.format(vpool.name, machine_id) storagedriver = StorageDriver() storagedriver.name = vrouter_id.replace('_', ' ') storagedriver.ports = {'management': ports[0], 'xmlrpc': ports[1], 'dtl': ports[2], 'edge': ports[3]} storagedriver.vpool = vpool storagedriver.cluster_ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(machine_id)) storagedriver.storage_ip = self.storage_ip storagedriver.mountpoint = '/mnt/{0}'.format(vpool.name) storagedriver.description = storagedriver.name storagedriver.storagerouter = storagerouter storagedriver.storagedriver_id = vrouter_id storagedriver.save() # ALBA Proxies proxy_service_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ALBA_PROXY) for proxy_id in xrange(self.sr_installer.requested_proxies): service = Service() service.storagerouter = storagerouter service.ports = [ports[4 + proxy_id]] service.name = 'albaproxy_{0}_{1}'.format(vpool.name, proxy_id) service.type = proxy_service_type service.save() alba_proxy = AlbaProxy() alba_proxy.service = service alba_proxy.storagedriver = storagedriver alba_proxy.save() self.storagedriver = storagedriver
def run_event_consumer(): """ Check whether to run the event consumer """ rmq_ini = ConfigObj( os.path.join(Configuration.get('ovs.core.cfgdir'), 'rabbitmqclient.cfg')) rmq_nodes = rmq_ini.get('main')['nodes'] if type( rmq_ini.get('main')['nodes']) == list else [ rmq_ini.get('main')['nodes'], ] machine_id = System.get_my_machine_id() return machine_id in rmq_nodes
def create_cluster(cluster_name, ip, server_port=DEFAULT_SERVER_PORT, client_port=DEFAULT_CLIENT_PORT): """ Creates a cluster :param cluster_name: Name of the cluster :type cluster_name: str :param ip: IP address of the first node of the new cluster :type ip: str :param server_port: Port to be used by server :type server_port: int :param client_port: Port to be used by client :type client_port: int :return: None """ EtcdInstaller._logger.debug('Creating cluster "{0}" on {1}'.format(cluster_name, ip)) client = SSHClient(ip, username='******') target_name = 'ovs-etcd-{0}'.format(cluster_name) if ServiceManager.has_service(target_name, client) and ServiceManager.get_service_status(target_name, client) is True: EtcdInstaller._logger.info('Service {0} already configured and running'.format(target_name)) return node_name = System.get_my_machine_id(client) data_dir = EtcdInstaller.DATA_DIR.format(cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' ServiceManager.add_service(base_name, client, params={'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(ip, server_port), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(ip, client_port), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1', client_port), 'INITIAL_CLUSTER': '{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(ip, server_port)), 'INITIAL_STATE': 'new', 'INITIAL_PEERS': '-initial-advertise-peer-urls {0}'.format(EtcdInstaller.SERVER_URL.format(ip, server_port))}, target_name=target_name) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client, client_port=client_port) EtcdInstaller._logger.debug('Creating cluster "{0}" on {1} completed'.format(cluster_name, ip))
def get_heartbeat_data(self): """ Returns heartbeat data """ data = {'cid': EtcdConfiguration.get('/ovs/framework/cluster_id'), 'nid': System.get_my_machine_id(), 'metadata': {}, 'errors': []} try: # Versions data['metadata']['versions'] = PackageManager.get_versions() except Exception, ex: data['errors'].append(str(ex))
def process_IN_DELETE(self, event): try: logger.debug('path: {0} - name: {1} - deleted'.format(event.path, event.name)) if self._is_etc_watcher(event.path): file_matcher = '/mnt/*/{0}/{1}'.format(System.get_my_machine_id(), event.name) for found_file in glob.glob(file_matcher): if os.path.exists(found_file) and os.path.isfile(found_file): os.remove(found_file) logger.info('File on vpool deleted: {0}'.format(found_file)) if self._is_run_watcher(event.path): self.invalidate_vmachine_status(event.name) except Exception as exception: logger.error('Exception during process_IN_DELETE: {0}'.format(str(exception)), print_msg=True)
def get_heartbeat_data(self): """ Returns heartbeat data """ data = {'cid': Configuration.get('/ovs/framework/cluster_id'), 'nid': System.get_my_machine_id(), 'metadata': {}, 'errors': []} try: # Versions manager = PackageFactory.get_manager() data['metadata']['versions'] = manager.get_installed_versions() # Fallback to check_output except Exception, ex: data['errors'].append(str(ex))
def extend_cluster(master_ip, new_ip, cluster_name, base_dir): """ Extends a cluster to a given new node :param base_dir: Base directory that will hold the db and tlogs :param cluster_name: Name of the cluster to be extended :param new_ip: IP address of the node to be added :param master_ip: IP of one of the already existing nodes """ logger.debug('Extending cluster {0} from {1} to {2}'.format(cluster_name, master_ip, new_ip)) base_dir = base_dir.rstrip('/') from ovs.extensions.generic.volatilemutex import VolatileMutex port_mutex = VolatileMutex('arakoon_install_ports_{0}'.format(new_ip)) config = ArakoonClusterConfig(cluster_name) config.load_config() client = SSHClient(new_ip) node_name = System.get_my_machine_id(client) home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name) log_dir = ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name) tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name) ArakoonInstaller.archive_existing_arakoon_data(new_ip, home_dir, ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir), cluster_name) ArakoonInstaller.archive_existing_arakoon_data(new_ip, log_dir, ArakoonInstaller.ARAKOON_LOG_DIR.format(''), cluster_name) ArakoonInstaller.archive_existing_arakoon_data(new_ip, tlog_dir, ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir), cluster_name) try: port_mutex.acquire(wait=60) ports = ArakoonInstaller._get_free_ports(client) if node_name not in [node.name for node in config.nodes]: config.nodes.append(ArakoonNodeConfig(name=node_name, ip=new_ip, client_port=ports[0], messaging_port=ports[1], log_dir=log_dir, home=home_dir, tlog_dir=tlog_dir)) ArakoonInstaller._deploy(config) finally: port_mutex.release() logger.debug('Extending cluster {0} from {1} to {2} completed'.format(cluster_name, master_ip, new_ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def add_service(name, client, params=None, target_name=None, startup_dependency=None, delay_registration=False): """ Add a service :param name: Template name of the service to add :type name: str :param client: Client on which to add the service :type client: ovs.extensions.generic.sshclient.SSHClient :param params: Additional information about the service :type params: dict or None :param target_name: Overrule default name of the service with this name :type target_name: str or None :param startup_dependency: Additional startup dependency :type startup_dependency: str or None :param delay_registration: Register the service parameters in the config management right away or not :type delay_registration: bool :return: Parameters used by the service :rtype: dict """ if params is None: params = {} service_name = Systemd._get_name(name, client, '/opt/OpenvStorage/config/templates/systemd/') template_file = '/opt/OpenvStorage/config/templates/systemd/{0}.service'.format(service_name) if not client.file_exists(template_file): # Given template doesn't exist so we are probably using system init scripts return if target_name is not None: service_name = target_name params.update({'SERVICE_NAME': Toolbox.remove_prefix(service_name, 'ovs-'), 'STARTUP_DEPENDENCY': '' if startup_dependency is None else '{0}.service'.format(startup_dependency)}) template_content = client.file_read(template_file) for key, value in params.iteritems(): template_content = template_content.replace('<{0}>'.format(key), value) client.file_write('/lib/systemd/system/{0}.service'.format(service_name), template_content) try: client.run(['systemctl', 'daemon-reload']) client.run(['systemctl', 'enable', '{0}.service'.format(service_name)]) except CalledProcessError as cpe: Systemd._logger.exception('Add {0}.service failed, {1}'.format(service_name, cpe.output)) raise Exception('Add {0}.service failed, {1}'.format(service_name, cpe.output)) if delay_registration is False: Systemd.register_service(service_metadata=params, node_name=System.get_my_machine_id(client)) return params
def get_heartbeat_data(self): """ Returns heartbeat data """ data = { 'cid': EtcdConfiguration.get('/ovs/framework/cluster_id'), 'nid': System.get_my_machine_id(), 'metadata': {}, 'errors': [] } try: # Versions data['metadata']['versions'] = PackageManager.get_versions() except Exception, ex: data['errors'].append(str(ex))
def get_path(binary_name): """ Retrieve the absolute path for binary :param binary_name: Binary to get path for :return: Path """ machine_id = System.get_my_machine_id() config_location = '/ovs/framework/hosts/{0}/paths|{1}'.format(machine_id, binary_name) if not EtcdConfiguration.exists(config_location): try: path = check_output('which {0}'.format(binary_name), shell=True).strip() EtcdConfiguration.set(config_location, path) except CalledProcessError: return None else: path = EtcdConfiguration.get(config_location) return path
def _get_free_ports(client): node_name = System.get_my_machine_id(client) clusters = [] exclude_ports = [] if Configuration.dir_exists(ArakoonInstaller.CONFIG_ROOT): for cluster_name in Configuration.list(ArakoonInstaller.CONFIG_ROOT): config = ArakoonClusterConfig(cluster_name, False) config.load_config() for node in config.nodes: if node.name == node_name: clusters.append(cluster_name) exclude_ports.append(node.client_port) exclude_ports.append(node.messaging_port) ports = System.get_free_ports(Configuration.get('/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)), exclude_ports, 2, client) ArakoonInstaller._logger.debug(' Loaded free ports {0} based on existing clusters {1}'.format(ports, clusters)) return ports
def configure_volumerouter(self, vrouter_cluster, vrouter_config): """ Configures storage driver @param vrouter_config: dictionary of key/value pairs """ unique_machine_id = System.get_my_machine_id() self.load_config() if vrouter_config['vrouter_id'] == '{}{}'.format(self._vpool, unique_machine_id): for key, value in vrouter_config.iteritems(): self._config_file_content['volume_router'][key] = value # Configure the vrouter arakoon with empty values in order to use tokyo cabinet self._config_file_content['volume_router']['vrouter_arakoon_cluster_id'] = '' self._config_file_content['volume_router']['vrouter_arakoon_cluster_nodes'] = [] if not 'volume_router_cluster' in self._config_file_content: self._config_file_content['volume_router_cluster'] = {} self._config_file_content['volume_router_cluster'].update({'vrouter_cluster_id': vrouter_cluster}) self.write_config()
def create_config(self, cluster_name, ip, client_port, messaging_port, plugins=None): """ Creates initial config object causing this host to be master :param cluster_name: unique name for this arakoon cluster used in paths :param ip: ip on which service should listen :param client_port: :param messaging_port: :param plugins: optional arakoon plugins :return: """ client = SSHClient.load(ip) node_name = System.get_my_machine_id(client) base_dir = System.read_remote_config(client, 'ovs.core.db.arakoon.location') self.clear_config() self.config = ClusterConfig(base_dir, cluster_name, 'info', plugins) self.config.nodes.append(ClusterNode(node_name, ip, client_port, messaging_port)) self.config.target_ip = ip
def add_services(client, node_type, logger): """ Add the services required by the OVS cluster :param client: Client on which to add the services :type client: ovs_extensions.generic.sshclient.SSHClient :param node_type: Type of node ('master' or 'extra') :type node_type: str :param logger: Logger object used for logging :type logger: ovs.extensions.generic.logger.Logger :return: None """ Toolbox.log(logger=logger, messages='Adding services') service_manager = ServiceFactory.get_manager() services = {} worker_queue = System.get_my_machine_id(client=client) if node_type == 'master': worker_queue += ',ovs_masters' services.update({ 'memcached': { 'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue }, 'rabbitmq-server': { 'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue }, 'scheduled-tasks': {}, 'webapp-api': {}, 'volumerouter-consumer': {} }) services.update({ 'workers': { 'WORKER_QUEUE': worker_queue }, 'watcher-framework': {} }) for service_name, params in services.iteritems(): if not service_manager.has_service(service_name, client): Toolbox.log(logger=logger, messages='Adding service {0}'.format(service_name)) service_manager.add_service(name=service_name, params=params, client=client)
def up_and_running(mountpoint, storagedriver_id): """ Volumedriver informs us that the service is completely started. Post-start events can be executed :param mountpoint: Mountpoint to check :param storagedriver_id: ID of the storagedriver """ storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) if storagedriver is None: raise RuntimeError('A Storage Driver with id {0} could not be found.'.format(storagedriver_id)) storagedriver.startup_counter += 1 storagedriver.save() if storagedriver.storagerouter.pmachine.hvtype == 'VMWARE': client = SSHClient(storagedriver.storagerouter) machine_id = System.get_my_machine_id(client) if EtcdConfiguration.get('/ovs/framework/hosts/{0}/storagedriver|vmware_mode'.format(machine_id)) == 'classic': nfs = Nfsexports() nfs.unexport(mountpoint) nfs.export(mountpoint) nfs.trigger_rpc_mountd()
def _get_free_ports(client): node_name = System.get_my_machine_id(client) clusters = [] exclude_ports = [] if EtcdConfiguration.dir_exists(ArakoonInstaller.ETCD_CONFIG_ROOT): for cluster_name in EtcdConfiguration.list(ArakoonInstaller.ETCD_CONFIG_ROOT): try: config = ArakoonClusterConfig(cluster_name) config.load_config() for node in config.nodes: if node.name == node_name: clusters.append(cluster_name) exclude_ports.append(node.client_port) exclude_ports.append(node.messaging_port) except: logger.error(' Could not load port information of cluster {0}'.format(cluster_name)) ports = System.get_free_ports(EtcdConfiguration.get('/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)), exclude_ports, 2, client) logger.debug(' Loaded free ports {0} based on existing clusters {1}'.format(ports, clusters)) return ports
def get_path(binary_name): """ Retrieve the absolute path for binary :param binary_name: Binary to get path for :return: Path """ machine_id = System.get_my_machine_id() config_location = '/ovs/framework/hosts/{0}/paths|{1}'.format( machine_id, binary_name) if not EtcdConfiguration.exists(config_location): try: path = check_output('which {0}'.format(binary_name), shell=True).strip() EtcdConfiguration.set(config_location, path) except CalledProcessError: return None else: path = EtcdConfiguration.get(config_location) return path