def load_schedule(self): """Run scheduler first time and update scheduled data.""" placement = self.cell.schedule() for servername, server in self.cell.members().items(): placement_node = z.path.placement(servername) zkutils.ensure_exists(self.zkclient, placement_node, acl=[_SERVERS_ACL]) current = set(self.zkclient.get_children(placement_node)) correct = set(server.apps.keys()) for app in current - correct: _LOGGER.info('Unscheduling: %s - %s', servername, app) zkutils.ensure_deleted(self.zkclient, os.path.join(placement_node, app)) for app in correct - current: _LOGGER.info('Scheduling: %s - %s,%s', servername, app, self.cell.apps[app].identity) placement_data = self._placement_data(app) zkutils.put(self.zkclient, os.path.join(placement_node, app), placement_data, acl=[_SERVERS_ACL]) self._update_task(app, servername, why=None) # Store latest placement as reference. zkutils.put(self.zkclient, z.path.placement(), placement) self.up_to_date = True
def test_put(self): """Tests updating/creating node content.""" client = kazoo.client.KazooClient() zkutils.put(client, '/foo/bar') kazoo.client.KazooClient.create.assert_called_with( '/foo/bar', b'', acl=mock.ANY, makepath=True, sequence=False, ephemeral=False)
def _node_initialize(tm_env, runtime, zkclient, hostname, zk_server_path, zk_presence_path): """Node initialization. Should only be done on a cold start. """ try: new_node_info = sysinfo.node_info(tm_env, runtime) traitz = zkutils.get(zkclient, z.path.traits()) new_node_info['traits'] = traits.detect(traitz) # Merging scheduler data with node_info data node_info = zkutils.get(zkclient, zk_server_path) node_info.update(new_node_info) _LOGGER.info('Registering node: %s: %s, %r', zk_server_path, hostname, node_info) zkutils.update(zkclient, zk_server_path, node_info) host_acl = zkutils.make_host_acl(hostname, 'rwcda') _LOGGER.debug('host_acl: %r', host_acl) zkutils.put(zkclient, zk_presence_path, {'seen': False}, acl=[host_acl], ephemeral=True) # TODO: Fix the network initialization. Then the below can be part of # appenv.initialize() if os.name == 'posix': # Flush all rules in iptables nat and mangle tables (it is assumed # that none but Treadmill manages these tables) and bulk load all # the Treadmill static rules iptables.initialize(node_info['network']['external_ip']) except Exception: # pylint: disable=W0703 _LOGGER.exception('Node initialization failed') zkclient.stop()
def create_endpoint_file(approot, port, appname, endpoint): """Create and link local endpoint file""" hostport = '%s:%s' % (sysinfo.hostname(), port) zkclinet = context.GLOBAL.zk.conn endpoint_proid_path = z.path.endpoint_proid(appname) acl = zkclinet.make_servers_acl() _LOGGER.info('Ensuring %s exists with ACL %r', endpoint_proid_path, acl) zkutils.ensure_exists(zkclinet, endpoint_proid_path, acl=[acl]) endpoint_path = z.path.endpoint(appname, 'tcp', endpoint) _LOGGER.info('Registering %s %s', endpoint_path, hostport) # Need to delete/create endpoints for the disovery to pick it up in # case of master restart. zkutils.ensure_deleted(zkclinet, endpoint_path) time.sleep(5) zkutils.put(zkclinet, endpoint_path, hostport) tm_env = appenv.AppEnvironment(approot) endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) endpoints_mgr.unlink_all(appname=appname, endpoint=endpoint, proto='tcp') endpoints_mgr.create_spec( appname=appname, endpoint=endpoint, proto='tcp', real_port=port, pid=os.getpid(), port=port, owner='/proc/{}'.format(os.getpid()), )
def _node_initialize(tm_env, runtime, zkclient, hostname, zk_server_path, zk_presence_path): """Node initialization. Should only be done on a cold start. """ try: new_node_info = sysinfo.node_info(tm_env, runtime) # Merging scheduler data with node_info data node_info = zkutils.get(zkclient, zk_server_path) node_info.update(new_node_info) _LOGGER.info('Registering node: %s: %s, %r', zk_server_path, hostname, node_info) zkutils.update(zkclient, zk_server_path, node_info) host_acl = zkutils.make_host_acl(hostname, 'rwcda') _LOGGER.debug('host_acl: %r', host_acl) zkutils.put(zkclient, zk_presence_path, {'seen': False}, acl=[host_acl], ephemeral=True) # Invoke the local node initialization tm_env.initialize(node_info) except Exception: # pylint: disable=W0703 _LOGGER.exception('Node initialization failed') zkclient.stop()
def _save_appgroup_lookup(zkclient, db_file, proid, digest): """Save appgroup lookup to Zookeeper.""" with io.open(db_file, 'rb') as f: zkutils.put(zkclient, z.path.appgroup_lookup(proid, digest), f.read()) _remove_extra_appgroup_lookup(zkclient, proid, digest)
def create_bucket(zkclient, bucket_id, parent_id, traits=0): """Creates bucket definition in Zookeeper.""" data = { 'traits': traits, 'parent': parent_id } zkutils.put(zkclient, z.path.bucket(bucket_id), data, check_content=True) create_event(zkclient, 0, 'buckets', None)
def _save_version(zkclient, hostname, version): """Save server version data to ZK. """ node_path = z.path.version_history(hostname) versions = zkutils.get_default(zkclient, node_path) if not versions: versions = [] versions.insert(0, version) zkutils.put(zkclient, node_path, versions[0:_MAX_VERSIONS])
def update_appmonitor(zkclient, monitor_id, count): """Configures app monitor.""" node = z.path.appmonitor(monitor_id) data = {'count': count} zkutils.put(zkclient, node, data, check_content=True) # return data directly. As check_content=True, we believe data is correct data['_id'] = monitor_id return data
def _sync_allocations(zkclient, allocations): """Syncronize allocations.""" filtered = [] for alloc in allocations: _LOGGER.info('Sync allocation: %s', alloc) name, _cell = alloc['_id'].rsplit('/', 1) alloc['name'] = name filtered.append(alloc) zkutils.put(zkclient, z.path.allocation(), filtered, check_content=True)
def _publish(self): """Publish updated discovery info to Zookeeper.""" _LOGGER.info('Publishing discovery info') state = list(sorted(self.state)) if self.instance: instance = '#'.join([self.hostname, self.instance]) else: instance = self.hostname zkutils.put(self.zkclient, z.path.discovery(instance), state, ephemeral=True, acl=[self.node_acl])
def _publish(self, result): """Publish network info to Zookeeper.""" if self.instance: instance = '#'.join([self.hostname, self.instance]) else: instance = self.hostname zkutils.put(self.zkclient, z.path.discovery_state(instance), result, ephemeral=True, acl=[self.node_acl])
def test_put_check_content(self): """Verifies put/update with check_content=True.""" treadmill.zkutils.ZkClient.create.side_effect = ( kazoo.client.NodeExistsError) treadmill.zkutils.ZkClient.get.return_value = (b'aaa', {}) zkclient = treadmill.zkutils.ZkClient() zkutils.put(zkclient, '/a', 'aaa', check_content=True) self.assertFalse(treadmill.zkutils.ZkClient.set.called) zkutils.put(zkclient, '/a', 'bbb', check_content=True) treadmill.zkutils.ZkClient.set.assert_called_with('/a', b'bbb')
def _register_endpoint(zkclient, port): """Register policy server endpoint in Zookeeper.""" hostname = sysinfo.hostname() zkclient.ensure_path(z.path.warpgate()) node_path = z.path.warpgate('%s:%s' % (hostname, port)) _LOGGER.info('registering locker: %s', node_path) if zkclient.exists(node_path): _LOGGER.info('removing previous node %s', node_path) zkutils.ensure_deleted(zkclient, node_path) zkutils.put(zkclient, node_path, {}, acl=None, ephemeral=True)
def register_endpoint(self, port): """Register ticket locker endpoint in Zookeeper.""" hostname = sysinfo.hostname() self.zkclient.ensure_path(z.TICKET_LOCKER) node_path = z.path.ticket_locker('%s:%s' % (hostname, port)) _LOGGER.info('registering locker: %s', node_path) if self.zkclient.exists(node_path): _LOGGER.info('removing previous node %s', node_path) zkutils.ensure_deleted(self.zkclient, node_path) zkutils.put(self.zkclient, node_path, {}, acl=None, ephemeral=True)
def test_put_existing(self): """Test update content of existing node.""" def raise_exists(*_args, **_kwargs): """zk.create side effect, raising appropriate exception.""" raise kazoo.client.NodeExistsError() client = treadmill.zkutils.ZkClient() treadmill.zkutils.ZkClient.create.side_effect = raise_exists zkutils.put(client, '/foo/bar') treadmill.zkutils.ZkClient.set.assert_called_with('/foo/bar', b'') treadmill.zkutils.ZkClient.set_acls.assert_called_with( '/foo/bar', mock.ANY)
def test_put_existing(self): """Test update content of existing node.""" def raise_exists(*args_unused, **kwargs_unused): """zk.create side effect, raising appropriate exception.""" raise kazoo.client.NodeExistsError() client = kazoo.client.KazooClient() kazoo.client.KazooClient.create.side_effect = raise_exists zkutils.put(client, '/foo/bar') kazoo.client.KazooClient.set.assert_called_with('/foo/bar', '') kazoo.client.KazooClient.set_acls.assert_called_with( '/foo/bar', mock.ANY)
def reschedule(self): """Run scheduler and adjust placement.""" placement = self.cell.schedule() # Filter out placement records where nothing changed. changed_placement = [ (app, before, exp_before, after, exp_after) for app, before, exp_before, after, exp_after in placement if before != after or exp_before != exp_after ] # We run two loops. First - remove all old placement, before creating # any new ones. This ensures that in the event of loop interruption # for anyreason (like Zookeeper connection lost or master restart) # there are no duplicate placements. for app, before, exp_before, after, exp_after in changed_placement: if before and before != after: _LOGGER.info('Unscheduling: %s - %s', before, app) zkutils.ensure_deleted(self.zkclient, z.path.placement(before, app)) for app, before, exp_before, after, exp_after in changed_placement: placement_data = self._placement_data(app) why = '' if before is not None: if (before not in self.servers or self.servers[before].state == scheduler.State.down): why = '{server}:down'.format(server=before) else: # TODO: it will be nice to put app utilization at the time # of eviction, but this info is not readily # available yet in the scheduler. why = 'evicted' if after: _LOGGER.info('Scheduling: %s - %s,%s, expires at: %s', after, app, self.cell.apps[app].identity, exp_after) zkutils.put(self.zkclient, z.path.placement(after, app), placement_data, acl=[_SERVERS_ACL]) self._update_task(app, after, why=why) else: self._update_task(app, None, why=why) self._unschedule_evicted() # Store latest placement as reference. zkutils.put(self.zkclient, z.path.placement(), placement) self.up_to_date = True
def update_allocations(zkclient, allocations): """Updates identity group count.""" if zkutils.put(zkclient, z.path.allocation(), allocations, check_content=True): create_event(zkclient, 0, 'allocations', None)
def _sync_partitions(zkclient, entities): """Syncs partitions to Zookeeper. """ _LOGGER.info('Sync: %s', z.path.partition()) zkclient.ensure_path(z.path.partition()) in_zk = zkclient.get_children(z.path.partition()) names = [entity['_id'] for entity in entities] for extra in set(in_zk) - set(names): _LOGGER.debug('Delete: %s', extra) zkutils.ensure_deleted(zkclient, z.path.partition(extra)) # Add or update current partitions for entity in entities: zkname = entity['_id'] if 'reboot-schedule' in entity: try: entity['reboot-schedule'] = utils.reboot_schedule( entity['reboot-schedule']) except ValueError: _LOGGER.info('Invalid reboot schedule, ignoring.') if zkutils.put(zkclient, z.path.partition(zkname), entity, check_content=True): _LOGGER.info('Update: %s', zkname) else: _LOGGER.info('Up to date: %s', zkname)
def create_apps(zkclient, app_id, app, count): """Schedules new apps.""" instance_ids = [] acl = zkutils.make_role_acl('servers', 'rwcd') for _idx in range(0, count): node_path = zkutils.put(zkclient, _app_node(app_id, existing=False), app, sequence=True, acl=[acl]) instance_id = os.path.basename(node_path) # Create task for the app, and put it in pending state. # TODO: probably need to create PendingEvent and use to_data method. task_node = z.path.trace( instance_id, '{time},{hostname},pending,{data}'.format( time=time.time(), hostname=sysinfo.hostname(), data='created')) try: zkclient.create(task_node, b'', acl=[_SERVERS_ACL], makepath=True) except kazoo.client.NodeExistsError: pass instance_ids.append(instance_id) return instance_ids
def create_server(zkclient, server_id, parent_id): """Creates server definition in Zookeeper.""" server_node = z.path.server(server_id) server_acl = zkutils.make_host_acl(server_id, 'rwcd') zkutils.ensure_exists(zkclient, server_node, acl=[server_acl]) # zkutils.get return dict/tuple if need_metadata is true. # # pylint: disable=R0204 data = zkutils.get(zkclient, server_node) if parent_id: if not data: data = {'parent': parent_id} else: data['parent'] = parent_id _LOGGER.info('Creating server node %s with data %r and ACL %r', server_node, data, server_acl) if zkutils.put(zkclient, server_node, data, acl=[server_acl], check_content=True): create_event(zkclient, 0, 'servers', [server_id])
def sync_partitions(): """Syncs partitions to Zookeeper. """ _LOGGER.info('Sync: partitions.') zkclient = context.GLOBAL.zk.conn admin_cell = admin.Cell(context.GLOBAL.ldap.conn) partitions = admin_cell.partitions(context.GLOBAL.cell) zkclient.ensure_path(z.path.partition()) in_zk = zkclient.get_children(z.path.partition()) names = [partition['_id'] for partition in partitions] for extra in set(in_zk) - set(names): _LOGGER.debug('Delete: %s', extra) zkutils.ensure_deleted(zkclient, z.path.partition(extra)) # Add or update current partitions for partition in partitions: zkname = partition['_id'] if 'reboot-schedule' in partition: try: partition['reboot-schedule'] = utils.reboot_schedule( partition['reboot-schedule'] ) except ValueError: _LOGGER.info('Invalid reboot schedule, ignoring.') if zkutils.put(zkclient, z.path.partition(zkname), partition, check_content=True): _LOGGER.info('Update: %s', zkname) else: _LOGGER.info('Up to date: %s', zkname)
def _sync_collection(zkclient, entities, zkpath, match=None): """Syncs ldap collection to Zookeeper.""" _LOGGER.info('Sync: %s', zkpath) zkclient.ensure_path(zkpath) in_zk = zkclient.get_children(zkpath) names = [entity['_id'] for entity in entities] for entity in entities: _remove_id(entity) for extra in set(in_zk) - set(names): _LOGGER.debug('Delete: %s', extra) zkutils.ensure_deleted(zkclient, z.join_zookeeper_path(zkpath, extra)) # Add or update current app-groups for name, entity in zip(names, entities): zkname = name if match: zkname = match(name, entity) if not zkname: _LOGGER.debug('Skip: %s', name) continue if zkutils.put(zkclient, z.join_zookeeper_path(zkpath, zkname), entity, check_content=True): _LOGGER.info('Update: %s', zkname) else: _LOGGER.info('Up to date: %s', zkname)
def _sync_collection(zkclient, entities, zkpath, match=None): """Sync ldap collection to Zookeeper. """ _LOGGER.info('Sync: %s', zkpath) zkclient.ensure_path(zkpath) in_zk = zkclient.get_children(zkpath) to_sync = {} for entity in entities: name = entity.pop('_id') if match and not match(entity): _LOGGER.debug('Skip: %s', name) continue to_sync[name] = entity for to_del in set(in_zk) - set(to_sync): _LOGGER.info('Delete: %s', to_del) zkutils.ensure_deleted(zkclient, z.join_zookeeper_path(zkpath, to_del)) # Add or update current app-groups for name, entity in to_sync.items(): if zkutils.put(zkclient, z.join_zookeeper_path(zkpath, name), entity, check_content=True): _LOGGER.info('Update: %s', name) else: _LOGGER.info('Up to date: %s', name)
def update_appmonitor(zkclient, monitor_id, count, policy=None): """Configures app monitor.""" data = get_appmonitor(zkclient, monitor_id) if data is None: data = {} if count is not None: data['count'] = count if policy is not None: data['policy'] = policy node = z.path.appmonitor(monitor_id) zkutils.put(zkclient, node, data, check_content=True) # return data directly. As check_content=True, we believe data is correct data['_id'] = monitor_id return data
def create_event(zkclient, priority, event, payload): """Places event on the event queue.""" assert 0 <= priority <= 100 node_path = z.path.event( '%(priority)03d-%(event)s-' % {'priority': priority, 'event': event}) return os.path.basename( zkutils.put(zkclient, node_path, payload, acl=[_SERVERS_ACL], sequence=True))
def update_identity_group(zkclient, ident_group_id, count): """Updates identity group count.""" node = z.path.identity_group(ident_group_id) data = {'count': count} if zkutils.put(zkclient, node, data, check_content=True, acl=[zkclient.make_servers_acl()]): create_event(zkclient, 0, 'identity_groups', [ident_group_id])
def accept(tkt_spool_dir, port, appname, endpoint, use_v2): """Run ticket locker acceptor.""" if port == 0: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('0.0.0.0', 0)) port = sock.getsockname()[1] sock.close() hostname = sysinfo.hostname() hostport = '%s:%s' % (hostname, port) endpoint_proid_path = z.path.endpoint_proid(appname) _LOGGER.info('Ensuring %s exists with ACL %r', endpoint_proid_path, _SERVERS_ACL) zkutils.ensure_exists(context.GLOBAL.zk.conn, endpoint_proid_path, acl=[_SERVERS_ACL]) endpoint_path = z.path.endpoint(appname, 'tcp', endpoint) _LOGGER.info('Registering %s %s', endpoint_path, hostport) # Need to delete/create endpoints for the disovery to pick it up in # case of master restart. # # Unlile typical endpoint, we cannot make the node ephemeral as we # exec into tkt-recv. zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path) time.sleep(5) zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport) context.GLOBAL.zk.conn.stop() # Exec into tickets acceptor. If race condition will not allow it to # bind to the provided port, it will exit and registration will # happen again. if use_v2: subproc.safe_exec([ 'tkt_recv_v2', '-p{}'.format(port), '-d{}'.format(tkt_spool_dir) ]) else: subproc.safe_exec( ['tkt_recv', 'tcp://*:{}'.format(port), tkt_spool_dir])
def _publish_ticket(tkt_file): """Publish ticket details.""" if tkt_file.startswith('.'): return if not any([tkt_file.endswith(realm) for realm in realms]): _LOGGER.info('Ignore tkt_file: %s', tkt_file) return try: tkt_details = subproc.check_output( ['klist', '-5', '-e', '-f', tkt_file]) tkt_node = z.path.tickets(os.path.basename(tkt_file), self.hostname) zkutils.put(self.zkclient, tkt_node, tkt_details, ephemeral=True) except subproc.CalledProcessError: _LOGGER.warning('Unable to get tickets details.')