Пример #1
0
 def setUp(self):
     self.root = tempfile.mkdtemp()
     os.mkdir(os.path.join(self.root, 'endpoints'))
     os.mkdir(os.path.join(self.root, 'apps'))
     self.manager = endpoints.EndpointsMgr(
         os.path.join(self.root, 'endpoints')
     )
Пример #2
0
    def test_get_spec(self):
        """Test get endpoint spec with partial pattern match.
        """
        tm_env = appenv.AppEnvironment(root=self.root)
        endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)

        # pylint: disable=W0212
        self.assertIsNone(endpoints_mgr.get_spec())

        endpoints_mgr.create_spec(
            appname='appname##0000000001',
            proto='tcp',
            endpoint='nodeinfo',
            real_port=12345,
            pid=5213,
            port=8000,
            owner=None,
        )
        self.assertIsNotNone(endpoints_mgr.get_spec(proto='tcp'))
        self.assertEqual(
            endpoints_mgr.get_spec(proto='tcp'),
            endpoints_mgr.get_spec(endpoint='nodeinfo'),
        )
        self.assertEqual(
            endpoints_mgr.get_spec(proto='tcp'),
            endpoints_mgr.get_spec(proto='tcp', endpoint='nodeinfo'),
        )
Пример #3
0
def create_endpoint_file(approot, port, appname, endpoint):
    """Create and link local endpoint file"""
    hostport = '%s:%s' % (sysinfo.hostname(), port)
    zkclinet = context.GLOBAL.zk.conn

    endpoint_proid_path = z.path.endpoint_proid(appname)
    acl = zkclinet.make_servers_acl()
    _LOGGER.info('Ensuring %s exists with ACL %r', endpoint_proid_path, acl)
    zkutils.ensure_exists(zkclinet, endpoint_proid_path, acl=[acl])

    endpoint_path = z.path.endpoint(appname, 'tcp', endpoint)
    _LOGGER.info('Registering %s %s', endpoint_path, hostport)

    # Need to delete/create endpoints for the disovery to pick it up in
    # case of master restart.
    zkutils.ensure_deleted(zkclinet, endpoint_path)
    time.sleep(5)
    zkutils.put(zkclinet, endpoint_path, hostport)

    tm_env = appenv.AppEnvironment(approot)
    endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)
    endpoints_mgr.unlink_all(appname=appname, endpoint=endpoint, proto='tcp')
    endpoints_mgr.create_spec(
        appname=appname,
        endpoint=endpoint,
        proto='tcp',
        real_port=port,
        pid=os.getpid(),
        port=port,
        owner='/proc/{}'.format(os.getpid()),
    )
Пример #4
0
    def __init__(self, root):

        super(LinuxAppEnvironment, self).__init__(root)

        self.ctl_dir = os.path.join(self.root, self.CTL_DIR)
        self.metrics_dir = os.path.join(self.root, self.METRICS_DIR)
        self.mounts_dir = os.path.join(self.root, self.MOUNTS_DIR)
        self.rules_dir = os.path.join(self.root, self.RULES_DIR)
        self.services_tombstone_dir = os.path.join(self.tombstones_dir,
                                                   self.SERVICES_DIR)
        self.spool_dir = os.path.join(self.root, self.SPOOL_DIR)
        self.svc_cgroup_dir = os.path.join(self.root, self.SVC_CGROUP_DIR)
        self.svc_localdisk_dir = os.path.join(self.root,
                                              self.SVC_LOCALDISK_DIR)
        self.svc_network_dir = os.path.join(self.root, self.SVC_NETWORK_DIR)
        self.svc_presence_dir = os.path.join(self.root, self.SVC_PRESENCE_DIR)
        self.rules_dir = os.path.join(self.root, self.RULES_DIR)
        self.services_tombstone_dir = os.path.join(self.tombstones_dir,
                                                   self.SERVICES_DIR)
        self.ctl_dir = os.path.join(self.root, self.CTL_DIR)
        self.endpoints_dir = os.path.join(self.root, self.ENDPOINTS_DIR)

        self.rules = rulefile.RuleMgr(self.rules_dir, self.apps_dir)
        self.endpoints = endpoints.EndpointsMgr(self.endpoints_dir)

        # Services
        self.svc_cgroup = services.ResourceService(
            service_dir=self.svc_cgroup_dir, impl='cgroup')
        self.svc_localdisk = services.ResourceService(
            service_dir=self.svc_localdisk_dir, impl='localdisk')
        self.svc_network = services.ResourceService(
            service_dir=self.svc_network_dir, impl='network')
        self.svc_presence = services.ResourceService(
            service_dir=self.svc_presence_dir, impl='presence')
Пример #5
0
    def server(approot, register, port, auth, modules, title, cors_origin):
        """Runs nodeinfo server."""
        if port == 0:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.bind(('0.0.0.0', 0))
            port = sock.getsockname()[1]
            sock.close()

        hostname = sysinfo.hostname()
        hostport = '%s:%s' % (hostname, port)

        if register:
            zkclient = context.GLOBAL.zk.conn
            zkclient.add_listener(zkutils.exit_on_lost)

            appname = 'root.%s#%010d' % (hostname, os.getpid())
            app_pattern = 'root.%s#*' % (hostname)
            path = z.path.endpoint(appname, 'tcp', 'nodeinfo')
            _LOGGER.info('register endpoint: %s %s', path, hostport)
            zkutils.create(zkclient, path, hostport,
                           acl=[zkclient.make_servers_acl()],
                           ephemeral=True)

            # TODO: remove "legacy" endpoint registration once conversion is
            #       complete.
            tm_env = appenv.AppEnvironment(approot)
            # TODO: need to figure out how to handle windows.
            assert os.name != 'nt'
            endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)
            endpoints_mgr.unlink_all(
                app_pattern, endpoint='nodeinfo', proto='tcp'
            )
            endpoints_mgr.create_spec(
                appname=appname,
                endpoint='nodeinfo',
                proto='tcp',
                real_port=port,
                pid=os.getpid(),
                port=port,
                owner='/proc/{}'.format(os.getpid()),
            )

        _LOGGER.info('Starting nodeinfo server on port: %s', port)

        utils.drop_privileges()

        api_paths = []
        if modules:
            api_modules = {module: None for module in modules}
            api_paths = api.init(
                api_modules,
                title.replace('_', ' '),
                cors_origin
            )

        rest_server = rest.TcpRestServer(port, auth_type=auth,
                                         protect=api_paths)
        rest_server.run()
Пример #6
0
    def accept_cmd(tkt_spool_dir, approot, port, appname, endpoint, keytab):
        """Run ticket locker acceptor."""
        if keytab:
            _construct_keytab(keytab)

        if port == 0:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.bind(('0.0.0.0', 0))
            port = sock.getsockname()[1]
            sock.close()

        hostname = sysinfo.hostname()
        hostport = '%s:%s' % (hostname, port)

        endpoint_proid_path = z.path.endpoint_proid(appname)
        acl = context.GLOBAL.zk.conn.make_servers_acl()
        _LOGGER.info(
            'Ensuring %s exists with ACL %r',
            endpoint_proid_path,
            acl
        )
        zkutils.ensure_exists(
            context.GLOBAL.zk.conn,
            endpoint_proid_path,
            acl=[acl]
        )

        endpoint_path = z.path.endpoint(appname, 'tcp', endpoint)
        _LOGGER.info('Registering %s %s', endpoint_path, hostport)

        # Need to delete/create endpoints for the disovery to pick it up in
        # case of master restart.
        #
        # Unlile typical endpoint, we cannot make the node ephemeral as we
        # exec into tkt-recv.
        zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path)
        time.sleep(5)
        zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport)

        context.GLOBAL.zk.conn.stop()

        # TODO: this will publish information about the endpoint state
        #       under /discovery. Once discovery is refactored (if it will be)
        #       we can remove the "manual" zookeeper manipulation.
        tm_env = appenv.AppEnvironment(approot)
        endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)
        endpoints_mgr.unlink_all(
            appname=appname,
            endpoint=endpoint,
            proto='tcp'
        )
        endpoints_mgr.create_spec(
            appname=appname,
            endpoint=endpoint,
            proto='tcp',
            real_port=port,
            pid=os.getpid(),
            port=port,
            owner='/proc/{}'.format(os.getpid()),
        )

        subproc.safe_exec(['tkt_recv_v2',
                           '-p{}'.format(port),
                           '-d{}'.format(tkt_spool_dir)])
Пример #7
0
    def accept(tkt_spool_dir, approot, port, appname, endpoint, use_v2,
               keytab):
        """Run ticket locker acceptor."""
        if keytab:
            _construct_keytab(keytab)

        if port == 0:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.bind(('0.0.0.0', 0))
            port = sock.getsockname()[1]
            sock.close()

        hostname = sysinfo.hostname()
        hostport = '%s:%s' % (hostname, port)

        endpoint_proid_path = z.path.endpoint_proid(appname)
        acl = context.GLOBAL.zk.conn.make_servers_acl()
        _LOGGER.info(
            'Ensuring %s exists with ACL %r',
            endpoint_proid_path,
            acl
        )
        zkutils.ensure_exists(
            context.GLOBAL.zk.conn,
            endpoint_proid_path,
            acl=[acl]
        )

        endpoint_path = z.path.endpoint(appname, 'tcp', endpoint)
        _LOGGER.info('Registering %s %s', endpoint_path, hostport)

        # Need to delete/create endpoints for the disovery to pick it up in
        # case of master restart.
        #
        # Unlile typical endpoint, we cannot make the node ephemeral as we
        # exec into tkt-recv.
        zkutils.ensure_deleted(context.GLOBAL.zk.conn, endpoint_path)
        time.sleep(5)
        zkutils.put(context.GLOBAL.zk.conn, endpoint_path, hostport)

        context.GLOBAL.zk.conn.stop()

        tm_env = appenv.AppEnvironment(approot)
        endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)
        endpoints_mgr.unlink_all(
            appname=appname,
            endpoint=endpoint,
            proto='tcp'
        )
        endpoints_mgr.create_spec(
            appname=appname,
            endpoint=endpoint,
            proto='tcp',
            real_port=port,
            pid=os.getpid(),
            port=port,
            owner='/proc/{}'.format(os.getpid()),
        )

        # Exec into tickets acceptor. If race condition will not allow it to
        # bind to the provided port, it will exit and registration will
        # happen again.
        if use_v2:
            subproc.safe_exec(['tkt_recv_v2',
                               '-p{}'.format(port),
                               '-d{}'.format(tkt_spool_dir)])
        else:
            subproc.safe_exec(['tkt_recv',
                               'tcp://*:{}'.format(port),
                               tkt_spool_dir])
Пример #8
0
    def server(approot, register, port, auth, modules, config, title,
               cors_origin, rate_limit_global, rate_limit_module,
               rate_limit_by):
        """Runs nodeinfo server."""
        rate_limit = _get_rate_limit(
            rate_limit_global, rate_limit_module, rate_limit_by
        )

        rest_server = rest.TcpRestServer(port, auth_type=auth,
                                         rate_limit=rate_limit)
        port = rest_server.port

        hostname = sysinfo.hostname()
        hostport = '%s:%s' % (hostname, port)

        if register:
            zkclient = context.GLOBAL.zk.conn
            zkclient.add_listener(zkutils.exit_on_lost)

            appname = 'root.%s#%010d' % (hostname, os.getpid())
            app_pattern = 'root.%s#*' % (hostname)
            path = z.path.endpoint(appname, 'tcp', 'nodeinfo')
            _LOGGER.info('register endpoint: %s %s', path, hostport)
            zkutils.create(zkclient, path, hostport,
                           acl=[zkclient.make_servers_acl()],
                           ephemeral=True)

            # TODO: remove "legacy" endpoint registration once conversion is
            #       complete.
            tm_env = appenv.AppEnvironment(approot)

            endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)
            endpoints_mgr.unlink_all(
                app_pattern, endpoint='nodeinfo', proto='tcp'
            )

            # On Linux endpoint for nodeinfo is a symlink pointing to
            # /proc/{pid}, on Windows it's just a regular file
            owner = '/proc/{}'.format(os.getpid()) if os.name == 'posix' \
                else None

            endpoints_mgr.create_spec(
                appname=appname,
                endpoint='nodeinfo',
                proto='tcp',
                real_port=port,
                pid=os.getpid(),
                port=port,
                owner=owner,
            )

        _LOGGER.info('Starting nodeinfo server on port: %s', port)

        utils.drop_privileges()

        if modules:
            api_modules = {module: None for module in modules}
            for module, cfg in config:
                if module not in api_modules:
                    raise click.UsageError(
                        'Orphan config: %s, not in: %r' % (module, api_modules)
                    )
                api_modules[module] = yaml.load(stream=cfg)
                cfg.close()

            rest_server.protect = api.init(
                api_modules,
                title.replace('_', ' '),
                cors_origin
            )

        rest_server.run()
Пример #9
0
    def metrics(step, approot):
        """Collect node and container metrics."""

        tm_env = appenv.AppEnvironment(root=approot)
        endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)

        app_metrics_dir = os.path.join(tm_env.metrics_dir, 'apps')
        core_metrics_dir = os.path.join(tm_env.metrics_dir, 'core')
        fs.mkdir_safe(app_metrics_dir)
        fs.mkdir_safe(core_metrics_dir)

        # Initiate the list for monitored applications
        monitored_apps = set(
            os.path.basename(metric_name)[:-len('.rrd')]
            for metric_name in glob.glob('%s/*' % app_metrics_dir)
            if metric_name.endswith('.rrd'))

        sys_maj_min = '{}:{}'.format(*fs_linux.maj_min_from_path(approot))
        _LOGGER.info('Device sys maj:min = %s for approot: %s', sys_maj_min,
                     approot)

        _LOGGER.info('Loading rrd client')
        rrd_loader = RRDClientLoader()
        second_used = 0

        while True:
            if step > second_used:
                time.sleep(step - second_used)

            spec = endpoints_mgr.get_spec(proto='tcp', endpoint='nodeinfo')
            if spec is None:
                second_used = 0
                _LOGGER.warning('Cgroup REST api port not found.')
                continue

            # appname = 'root.{hostname}#{pid}'
            appname = spec[0]
            host = appname.split('#')[0][len('root.'):]
            port = int(spec[-1])
            remote = 'http://{0}:{1}'.format(host, port)
            _LOGGER.info('remote cgroup API address: %s', remote)

            starttime_sec = time.time()
            count = 0

            # aggregated cgroup values of `treadmill.core` and `treadmill.apps`
            url = '/cgroup/treadmill/*/'
            data = restclient.get(remote, url, auth=None).json()

            url = '/cgroup/treadmill'
            data['treadmill'] = restclient.get(remote, url, auth=None).json()
            count += _update_core_rrds(data, core_metrics_dir,
                                       rrd_loader.client, step, sys_maj_min)

            url = '/cgroup/treadmill/core/*/?detail=true'
            data = restclient.get(remote, url, auth=None).json()
            count += _update_service_rrds(data, core_metrics_dir,
                                          rrd_loader.client, step, sys_maj_min)

            url = '/cgroup/treadmill/apps/*/?detail=true'
            data = restclient.get(remote, url, auth=None).json()
            count += _update_app_rrds(data, app_metrics_dir, rrd_loader.client,
                                      step, tm_env)

            # Removed metrics for apps that are not present anymore
            seen_apps = set(data)
            for app_unique_name in monitored_apps - seen_apps:
                rrdfile = os.path.join(app_metrics_dir,
                                       '{app}.rrd'.format(app=app_unique_name))
                _LOGGER.info('removing %r', rrdfile)
                rrd.finish(rrd_loader.client, rrdfile)

            monitored_apps = seen_apps

            second_used = time.time() - starttime_sec
            _LOGGER.info('Got %d cgroups metrics in %.3f seconds', count,
                         second_used)

        # Gracefull shutdown.
        _LOGGER.info('service shutdown.')