def setUp(self): self.root = tempfile.mkdtemp() os.mkdir(os.path.join(self.root, 'endpoints')) os.mkdir(os.path.join(self.root, 'apps')) self.manager = endpoints.EndpointsMgr( os.path.join(self.root, 'endpoints') )
def test_get_spec(self): """Test get endpoint spec with partial pattern match. """ tm_env = appenv.AppEnvironment(root=self.root) endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) # pylint: disable=W0212 self.assertIsNone(endpoints_mgr.get_spec()) endpoints_mgr.create_spec( appname='appname##0000000001', proto='tcp', endpoint='nodeinfo', real_port=12345, pid=5213, port=8000, owner=None, ) self.assertIsNotNone(endpoints_mgr.get_spec(proto='tcp')) self.assertEqual( endpoints_mgr.get_spec(proto='tcp'), endpoints_mgr.get_spec(endpoint='nodeinfo'), ) self.assertEqual( endpoints_mgr.get_spec(proto='tcp'), endpoints_mgr.get_spec(proto='tcp', endpoint='nodeinfo'), )
def create_endpoint_file(approot, port, appname, endpoint): """Create and link local endpoint file""" hostport = '%s:%s' % (sysinfo.hostname(), port) zkclinet = context.GLOBAL.zk.conn endpoint_proid_path = z.path.endpoint_proid(appname) acl = zkclinet.make_servers_acl() _LOGGER.info('Ensuring %s exists with ACL %r', endpoint_proid_path, acl) zkutils.ensure_exists(zkclinet, endpoint_proid_path, acl=[acl]) endpoint_path = z.path.endpoint(appname, 'tcp', endpoint) _LOGGER.info('Registering %s %s', endpoint_path, hostport) # Need to delete/create endpoints for the disovery to pick it up in # case of master restart. zkutils.ensure_deleted(zkclinet, endpoint_path) time.sleep(5) zkutils.put(zkclinet, endpoint_path, hostport) tm_env = appenv.AppEnvironment(approot) endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) endpoints_mgr.unlink_all(appname=appname, endpoint=endpoint, proto='tcp') endpoints_mgr.create_spec( appname=appname, endpoint=endpoint, proto='tcp', real_port=port, pid=os.getpid(), port=port, owner='/proc/{}'.format(os.getpid()), )
def __init__(self, root): super(LinuxAppEnvironment, self).__init__(root) self.ctl_dir = os.path.join(self.root, self.CTL_DIR) self.metrics_dir = os.path.join(self.root, self.METRICS_DIR) self.mounts_dir = os.path.join(self.root, self.MOUNTS_DIR) self.rules_dir = os.path.join(self.root, self.RULES_DIR) self.services_tombstone_dir = os.path.join(self.tombstones_dir, self.SERVICES_DIR) self.spool_dir = os.path.join(self.root, self.SPOOL_DIR) self.svc_cgroup_dir = os.path.join(self.root, self.SVC_CGROUP_DIR) self.svc_localdisk_dir = os.path.join(self.root, self.SVC_LOCALDISK_DIR) self.svc_network_dir = os.path.join(self.root, self.SVC_NETWORK_DIR) self.svc_presence_dir = os.path.join(self.root, self.SVC_PRESENCE_DIR) self.rules_dir = os.path.join(self.root, self.RULES_DIR) self.services_tombstone_dir = os.path.join(self.tombstones_dir, self.SERVICES_DIR) self.ctl_dir = os.path.join(self.root, self.CTL_DIR) self.endpoints_dir = os.path.join(self.root, self.ENDPOINTS_DIR) self.rules = rulefile.RuleMgr(self.rules_dir, self.apps_dir) self.endpoints = endpoints.EndpointsMgr(self.endpoints_dir) # Services self.svc_cgroup = services.ResourceService( service_dir=self.svc_cgroup_dir, impl='cgroup') self.svc_localdisk = services.ResourceService( service_dir=self.svc_localdisk_dir, impl='localdisk') self.svc_network = services.ResourceService( service_dir=self.svc_network_dir, impl='network') self.svc_presence = services.ResourceService( service_dir=self.svc_presence_dir, impl='presence')
def server(approot, register, port, auth, modules, title, cors_origin): """Runs nodeinfo server.""" if port == 0: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('0.0.0.0', 0)) port = sock.getsockname()[1] sock.close() hostname = sysinfo.hostname() hostport = '%s:%s' % (hostname, port) if register: zkclient = context.GLOBAL.zk.conn zkclient.add_listener(zkutils.exit_on_lost) appname = 'root.%s#%010d' % (hostname, os.getpid()) app_pattern = 'root.%s#*' % (hostname) path = z.path.endpoint(appname, 'tcp', 'nodeinfo') _LOGGER.info('register endpoint: %s %s', path, hostport) zkutils.create(zkclient, path, hostport, acl=[zkclient.make_servers_acl()], ephemeral=True) # TODO: remove "legacy" endpoint registration once conversion is # complete. tm_env = appenv.AppEnvironment(approot) # TODO: need to figure out how to handle windows. assert os.name != 'nt' endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) endpoints_mgr.unlink_all( app_pattern, endpoint='nodeinfo', proto='tcp' ) endpoints_mgr.create_spec( appname=appname, endpoint='nodeinfo', proto='tcp', real_port=port, pid=os.getpid(), port=port, owner='/proc/{}'.format(os.getpid()), ) _LOGGER.info('Starting nodeinfo server on port: %s', port) utils.drop_privileges() api_paths = [] if modules: api_modules = {module: None for module in modules} api_paths = api.init( api_modules, title.replace('_', ' '), cors_origin ) rest_server = rest.TcpRestServer(port, auth_type=auth, protect=api_paths) rest_server.run()
def accept_cmd(tkt_spool_dir, approot, port, appname, endpoint, keytab): """Run ticket locker acceptor.""" if keytab: _construct_keytab(keytab) if port == 0: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('0.0.0.0', 0)) port = sock.getsockname()[1] sock.close() hostname = sysinfo.hostname() hostport = '%s:%s' % (hostname, port) endpoint_proid_path = z.path.endpoint_proid(appname) acl = context.GLOBAL.zk.conn.make_servers_acl() _LOGGER.info( 'Ensuring %s exists with ACL %r', endpoint_proid_path, acl ) zkutils.ensure_exists( context.GLOBAL.zk.conn, endpoint_proid_path, acl=[acl] ) endpoint_path = z.path.endpoint(appname, 'tcp', endpoint) _LOGGER.info('Registering %s %s', endpoint_path, hostport) # Need to delete/create endpoints for the disovery to pick it up in # case of master restart. # # Unlile typical endpoint, we cannot make the node ephemeral as we # exec into tkt-recv. zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path) time.sleep(5) zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport) context.GLOBAL.zk.conn.stop() # TODO: this will publish information about the endpoint state # under /discovery. Once discovery is refactored (if it will be) # we can remove the "manual" zookeeper manipulation. tm_env = appenv.AppEnvironment(approot) endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) endpoints_mgr.unlink_all( appname=appname, endpoint=endpoint, proto='tcp' ) endpoints_mgr.create_spec( appname=appname, endpoint=endpoint, proto='tcp', real_port=port, pid=os.getpid(), port=port, owner='/proc/{}'.format(os.getpid()), ) subproc.safe_exec(['tkt_recv_v2', '-p{}'.format(port), '-d{}'.format(tkt_spool_dir)])
def accept(tkt_spool_dir, approot, port, appname, endpoint, use_v2, keytab): """Run ticket locker acceptor.""" if keytab: _construct_keytab(keytab) if port == 0: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('0.0.0.0', 0)) port = sock.getsockname()[1] sock.close() hostname = sysinfo.hostname() hostport = '%s:%s' % (hostname, port) endpoint_proid_path = z.path.endpoint_proid(appname) acl = context.GLOBAL.zk.conn.make_servers_acl() _LOGGER.info( 'Ensuring %s exists with ACL %r', endpoint_proid_path, acl ) zkutils.ensure_exists( context.GLOBAL.zk.conn, endpoint_proid_path, acl=[acl] ) endpoint_path = z.path.endpoint(appname, 'tcp', endpoint) _LOGGER.info('Registering %s %s', endpoint_path, hostport) # Need to delete/create endpoints for the disovery to pick it up in # case of master restart. # # Unlile typical endpoint, we cannot make the node ephemeral as we # exec into tkt-recv. zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path) time.sleep(5) zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport) context.GLOBAL.zk.conn.stop() tm_env = appenv.AppEnvironment(approot) endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) endpoints_mgr.unlink_all( appname=appname, endpoint=endpoint, proto='tcp' ) endpoints_mgr.create_spec( appname=appname, endpoint=endpoint, proto='tcp', real_port=port, pid=os.getpid(), port=port, owner='/proc/{}'.format(os.getpid()), ) # Exec into tickets acceptor. If race condition will not allow it to # bind to the provided port, it will exit and registration will # happen again. if use_v2: subproc.safe_exec(['tkt_recv_v2', '-p{}'.format(port), '-d{}'.format(tkt_spool_dir)]) else: subproc.safe_exec(['tkt_recv', 'tcp://*:{}'.format(port), tkt_spool_dir])
def server(approot, register, port, auth, modules, config, title, cors_origin, rate_limit_global, rate_limit_module, rate_limit_by): """Runs nodeinfo server.""" rate_limit = _get_rate_limit( rate_limit_global, rate_limit_module, rate_limit_by ) rest_server = rest.TcpRestServer(port, auth_type=auth, rate_limit=rate_limit) port = rest_server.port hostname = sysinfo.hostname() hostport = '%s:%s' % (hostname, port) if register: zkclient = context.GLOBAL.zk.conn zkclient.add_listener(zkutils.exit_on_lost) appname = 'root.%s#%010d' % (hostname, os.getpid()) app_pattern = 'root.%s#*' % (hostname) path = z.path.endpoint(appname, 'tcp', 'nodeinfo') _LOGGER.info('register endpoint: %s %s', path, hostport) zkutils.create(zkclient, path, hostport, acl=[zkclient.make_servers_acl()], ephemeral=True) # TODO: remove "legacy" endpoint registration once conversion is # complete. tm_env = appenv.AppEnvironment(approot) endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) endpoints_mgr.unlink_all( app_pattern, endpoint='nodeinfo', proto='tcp' ) # On Linux endpoint for nodeinfo is a symlink pointing to # /proc/{pid}, on Windows it's just a regular file owner = '/proc/{}'.format(os.getpid()) if os.name == 'posix' \ else None endpoints_mgr.create_spec( appname=appname, endpoint='nodeinfo', proto='tcp', real_port=port, pid=os.getpid(), port=port, owner=owner, ) _LOGGER.info('Starting nodeinfo server on port: %s', port) utils.drop_privileges() if modules: api_modules = {module: None for module in modules} for module, cfg in config: if module not in api_modules: raise click.UsageError( 'Orphan config: %s, not in: %r' % (module, api_modules) ) api_modules[module] = yaml.load(stream=cfg) cfg.close() rest_server.protect = api.init( api_modules, title.replace('_', ' '), cors_origin ) rest_server.run()
def metrics(step, approot): """Collect node and container metrics.""" tm_env = appenv.AppEnvironment(root=approot) endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) app_metrics_dir = os.path.join(tm_env.metrics_dir, 'apps') core_metrics_dir = os.path.join(tm_env.metrics_dir, 'core') fs.mkdir_safe(app_metrics_dir) fs.mkdir_safe(core_metrics_dir) # Initiate the list for monitored applications monitored_apps = set( os.path.basename(metric_name)[:-len('.rrd')] for metric_name in glob.glob('%s/*' % app_metrics_dir) if metric_name.endswith('.rrd')) sys_maj_min = '{}:{}'.format(*fs_linux.maj_min_from_path(approot)) _LOGGER.info('Device sys maj:min = %s for approot: %s', sys_maj_min, approot) _LOGGER.info('Loading rrd client') rrd_loader = RRDClientLoader() second_used = 0 while True: if step > second_used: time.sleep(step - second_used) spec = endpoints_mgr.get_spec(proto='tcp', endpoint='nodeinfo') if spec is None: second_used = 0 _LOGGER.warning('Cgroup REST api port not found.') continue # appname = 'root.{hostname}#{pid}' appname = spec[0] host = appname.split('#')[0][len('root.'):] port = int(spec[-1]) remote = 'http://{0}:{1}'.format(host, port) _LOGGER.info('remote cgroup API address: %s', remote) starttime_sec = time.time() count = 0 # aggregated cgroup values of `treadmill.core` and `treadmill.apps` url = '/cgroup/treadmill/*/' data = restclient.get(remote, url, auth=None).json() url = '/cgroup/treadmill' data['treadmill'] = restclient.get(remote, url, auth=None).json() count += _update_core_rrds(data, core_metrics_dir, rrd_loader.client, step, sys_maj_min) url = '/cgroup/treadmill/core/*/?detail=true' data = restclient.get(remote, url, auth=None).json() count += _update_service_rrds(data, core_metrics_dir, rrd_loader.client, step, sys_maj_min) url = '/cgroup/treadmill/apps/*/?detail=true' data = restclient.get(remote, url, auth=None).json() count += _update_app_rrds(data, app_metrics_dir, rrd_loader.client, step, tm_env) # Removed metrics for apps that are not present anymore seen_apps = set(data) for app_unique_name in monitored_apps - seen_apps: rrdfile = os.path.join(app_metrics_dir, '{app}.rrd'.format(app=app_unique_name)) _LOGGER.info('removing %r', rrdfile) rrd.finish(rrd_loader.client, rrdfile) monitored_apps = seen_apps second_used = time.time() - starttime_sec _LOGGER.info('Got %d cgroups metrics in %.3f seconds', count, second_used) # Gracefull shutdown. _LOGGER.info('service shutdown.')