Exemplo n.º 1
0
    def __init__(self, approot, interval):
        self.cache = {'treadmill': {}, 'core': {}, 'app': {}}
        self._interval = interval

        self._tm_env = appenv.AppEnvironment(root=approot)
        self._sys_svcs = _sys_svcs(approot)
        # TODO: sys_maj_min will be used changing treadmill.metrics.app_metrics
        self._sys_maj_min = '{}:{}'.format(
            *fs_linux.maj_min_from_path(approot))
        self._sys_block_dev = fs_linux.maj_min_to_blk(
            *fs_linux.maj_min_from_path(approot))

        # if interval is zero, we just read one time
        if interval <= 0:
            self._read()
        else:
            self._loop()
Exemplo n.º 2
0
    def benchmark(benchmark_publish_file, vg_name, underlying_device_name,
                  underlying_image_path, benchmark_volume, rw_type, job_number,
                  thread_number, iops_block_size, bps_block_size, max_seconds):
        """Benchmark node IO performance"""
        try:
            if underlying_device_name is not None:
                # LVM is based on physical device,
                # benchmark VG directly
                underlying_device_uuid = fs_linux.blk_uuid(
                    underlying_device_name)
                max_iops_result = diskbenchmark.benchmark_vg(
                    vg_name, benchmark_volume, rw_type, job_number,
                    thread_number, iops_block_size, max_seconds)
                max_bps_result = diskbenchmark.benchmark_vg(
                    vg_name, benchmark_volume, rw_type, job_number,
                    thread_number, bps_block_size, max_seconds)
            elif underlying_image_path is not None:
                # LVM is based on loop device,
                # benchmark underlying physical device of image file
                underlying_device_uuid = fs_linux.blk_uuid(
                    fs_linux.maj_min_to_blk(
                        *fs_linux.maj_min_from_path(underlying_image_path)))
                benchmark_path = os.path.join(underlying_image_path,
                                              'benchmark')
                max_iops_result = diskbenchmark.benchmark(
                    benchmark_path, benchmark_volume, rw_type, job_number,
                    thread_number, iops_block_size, max_seconds)
                max_bps_result = diskbenchmark.benchmark(
                    benchmark_path, benchmark_volume, rw_type, job_number,
                    thread_number, bps_block_size, max_seconds)
                if os.path.isdir(benchmark_path):
                    shutil.rmtree(benchmark_path)
            else:
                _LOGGER.error('No underlying device, please specify '
                              '--underlying-device-name/'
                              '--underlying-image-path')
                return

            diskbenchmark.write(
                benchmark_publish_file, {
                    underlying_device_uuid: {
                        'read_bps': max_bps_result['read_bps'],
                        'write_bps': max_bps_result['write_bps'],
                        'read_iops': max_iops_result['read_iops'],
                        'write_iops': max_iops_result['write_iops']
                    }
                })
        except subproc.CommandAliasError:
            _LOGGER.error(_ALIAS_ERROR_MESSAGE)
Exemplo n.º 3
0
    def localdisk(img_location, img_size, block_dev, vg_name,
                  block_dev_configuration, block_dev_read_bps,
                  block_dev_write_bps, block_dev_read_iops,
                  block_dev_write_iops, default_read_bps, default_write_bps,
                  default_read_iops, default_write_iops):
        """Runs localdisk service."""

        root_dir = local_ctx['root-dir']
        watchdogs_dir = local_ctx['watchdogs-dir']

        svc = services.ResourceService(service_dir=os.path.join(
            root_dir, 'localdisk_svc'),
                                       impl='localdisk')

        block_dev_params = [
            block_dev_read_bps, block_dev_write_bps, block_dev_read_iops,
            block_dev_write_iops
        ]
        if img_location is None:
            img_location = root_dir

        # prepare block device
        if block_dev is not None:
            underlying_device_uuid = fs_linux.blk_uuid(block_dev)
        else:
            underlying_device_uuid = fs_linux.blk_uuid(
                fs_linux.maj_min_to_blk(
                    *fs_linux.maj_min_from_path(img_location)))
            block_dev = localdiskutils.init_block_dev(
                localdiskutils.TREADMILL_IMG, img_location, img_size)

        # prepare block device configuration
        read_bps = None
        write_bps = None
        read_iops = None
        write_iops = None

        # use block device config file
        if block_dev_configuration is not None and all(
                param is None for param in block_dev_params):
            try:
                current_benchmark = diskbenchmark.read(
                    block_dev_configuration)[underlying_device_uuid]
                read_bps = current_benchmark['read_bps']
                write_bps = current_benchmark['write_bps']
                read_iops = int(current_benchmark['read_iops'])
                write_iops = int(current_benchmark['write_iops'])
            except IOError:
                _LOGGER.error('No benchmark found : %s',
                              block_dev_configuration)
            except (KeyError, ValueError):
                _LOGGER.error('Incorrect disk benchmark for device %s in %s',
                              underlying_device_uuid, block_dev_configuration)

        # use block device config parameters
        if all(param is not None for param in
               block_dev_params) and block_dev_configuration is None:
            read_bps = block_dev_read_bps
            write_bps = block_dev_write_bps
            read_iops = block_dev_read_iops
            write_iops = block_dev_write_iops

        if None in [read_bps, write_bps, read_iops, write_iops]:
            _LOGGER.error('Bad block dev configuration')
            read_bps = '200M'
            write_bps = '200M'
            read_iops = 3000
            write_iops = 3000

        svc.run(
            watchdogs_dir=os.path.join(root_dir, watchdogs_dir),
            block_dev=block_dev,
            vg_name=vg_name,
            read_bps=read_bps,
            write_bps=write_bps,
            read_iops=read_iops,
            write_iops=write_iops,
            default_read_bps=default_read_bps,
            default_write_bps=default_write_bps,
            default_read_iops=default_read_iops,
            default_write_iops=default_write_iops,
        )
Exemplo n.º 4
0
    def metrics(step, approot):
        """Collect node and container metrics."""

        tm_env = appenv.AppEnvironment(root=approot)
        endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)

        app_metrics_dir = os.path.join(tm_env.metrics_dir, 'apps')
        core_metrics_dir = os.path.join(tm_env.metrics_dir, 'core')
        fs.mkdir_safe(app_metrics_dir)
        fs.mkdir_safe(core_metrics_dir)

        # Initiate the list for monitored applications
        monitored_apps = set(
            os.path.basename(metric_name)[:-len('.rrd')]
            for metric_name in glob.glob('%s/*' % app_metrics_dir)
            if metric_name.endswith('.rrd'))

        sys_maj_min = '{}:{}'.format(*fs_linux.maj_min_from_path(approot))
        _LOGGER.info('Device sys maj:min = %s for approot: %s', sys_maj_min,
                     approot)

        _LOGGER.info('Loading rrd client')
        rrd_loader = RRDClientLoader()
        second_used = 0

        while True:
            if step > second_used:
                time.sleep(step - second_used)

            spec = endpoints_mgr.get_spec(proto='tcp', endpoint='nodeinfo')
            if spec is None:
                second_used = 0
                _LOGGER.warning('Cgroup REST api port not found.')
                continue

            # appname = 'root.{hostname}#{pid}'
            appname = spec[0]
            host = appname.split('#')[0][len('root.'):]
            port = int(spec[-1])
            remote = 'http://{0}:{1}'.format(host, port)
            _LOGGER.info('remote cgroup API address: %s', remote)

            starttime_sec = time.time()
            count = 0

            # aggregated cgroup values of `treadmill.core` and `treadmill.apps`
            url = '/cgroup/treadmill/*/'
            data = restclient.get(remote, url, auth=None).json()

            url = '/cgroup/treadmill'
            data['treadmill'] = restclient.get(remote, url, auth=None).json()
            count += _update_core_rrds(data, core_metrics_dir,
                                       rrd_loader.client, step, sys_maj_min)

            url = '/cgroup/treadmill/core/*/?detail=true'
            data = restclient.get(remote, url, auth=None).json()
            count += _update_service_rrds(data, core_metrics_dir,
                                          rrd_loader.client, step, sys_maj_min)

            url = '/cgroup/treadmill/apps/*/?detail=true'
            data = restclient.get(remote, url, auth=None).json()
            count += _update_app_rrds(data, app_metrics_dir, rrd_loader.client,
                                      step, tm_env)

            # Removed metrics for apps that are not present anymore
            seen_apps = set(data)
            for app_unique_name in monitored_apps - seen_apps:
                rrdfile = os.path.join(app_metrics_dir,
                                       '{app}.rrd'.format(app=app_unique_name))
                _LOGGER.info('removing %r', rrdfile)
                rrd.finish(rrd_loader.client, rrdfile)

            monitored_apps = seen_apps

            second_used = time.time() - starttime_sec
            _LOGGER.info('Got %d cgroups metrics in %.3f seconds', count,
                         second_used)

        # Gracefull shutdown.
        _LOGGER.info('service shutdown.')
Exemplo n.º 5
0
 def _sys_block_dev(self):
     if '_sys_block_dev' not in self._initialized:
         self._initialized['_sys_block_dev'] = fs_linux.maj_min_to_blk(
             *fs_linux.maj_min_from_path(self._approot))
     return self._initialized['_sys_block_dev']
Exemplo n.º 6
0
 def _sys_maj_min(self):
     # TODO: sys_maj_min will be used changing treadmill.metrics.app_metrics
     if '_sys_maj_min' not in self._initialized:
         self._initialized['_sys_maj_min'] = '{}:{}'.format(
             *fs_linux.maj_min_from_path(self._approot))
     return self._initialized['_sys_maj_min']
Exemplo n.º 7
0
    def metrics(step, approot, api_socket):
        """Collect node and container metrics."""
        remote = 'http+unix://{}'.format(urllib_parse.quote_plus(api_socket))
        _LOGGER.info('remote cgroup API address %s', remote)

        tm_env = appenv.AppEnvironment(root=approot)

        app_metrics_dir = os.path.join(tm_env.metrics_dir, 'apps')
        core_metrics_dir = os.path.join(tm_env.metrics_dir, 'core')
        fs.mkdir_safe(app_metrics_dir)
        fs.mkdir_safe(core_metrics_dir)

        # Initiate the list for monitored applications
        monitored_apps = set(
            os.path.basename(metric_name)[:-len('.rrd')]
            for metric_name in glob.glob('%s/*' % app_metrics_dir)
            if metric_name.endswith('.rrd')
        )

        sys_maj_min = '{}:{}'.format(*fs_linux.maj_min_from_path(approot))
        _LOGGER.info('Device sys maj:min = %s for approot: %s',
                     sys_maj_min, approot)

        _LOGGER.info('Loading rrd client')
        rrd_loader = RRDClientLoader()
        second_used = 0
        while True:
            if step > second_used:
                time.sleep(step - second_used)

            starttime_sec = time.time()
            count = 0
            data = restclient.get(remote, '/cgroup/_bulk', auth=None).json()

            count += _update_core_rrds(
                data['treadmill'], core_metrics_dir,
                rrd_loader.client,
                step, sys_maj_min
            )

            count += _update_service_rrds(
                data['core'],
                core_metrics_dir,
                rrd_loader.client,
                step, sys_maj_min
            )

            count += _update_app_rrds(
                data['app'],
                app_metrics_dir,
                rrd_loader.client,
                step, tm_env
            )

            # Removed metrics for apps that are not present anymore
            seen_apps = set(data['app'].keys())
            for app_unique_name in monitored_apps - seen_apps:
                rrdfile = os.path.join(
                    app_metrics_dir, '{app}.rrd'.format(app=app_unique_name))
                _LOGGER.info('removing %r', rrdfile)
                rrd.finish(rrd_loader.client, rrdfile)

            monitored_apps = seen_apps

            second_used = time.time() - starttime_sec
            _LOGGER.info('Got %d cgroups metrics in %.3f seconds',
                         count, second_used)

        # Gracefull shutdown.
        _LOGGER.info('service shutdown.')