Пример #1
0
    def _initialize(self) -> Optional[int]:
        """Check privileges, RDT availability and prepare internal state.
        Can return error code that should stop Runner.
        """
        if not security.are_privileges_sufficient():
            log.error(
                "Insufficient privileges! "
                "Impossible to use perf_event_open/resctrl subsystems. "
                "For unprivileged user it is needed to: "
                "adjust /proc/sys/kernel/perf_event_paranoid (set to -1), "
                "has CAP_DAC_OVERRIDE and CAP_SETUID capabilities and"
                "SECBIT_NO_SETUID_FIXUP secure bit set.")
            return 1

        # Initialization (auto discovery Intel RDT features).

        rdt_available = resctrl.check_resctrl()
        if self._rdt_enabled is None:
            self._rdt_enabled = rdt_available
            log.info('RDT enabled (auto configuration): %s', self._rdt_enabled)
        elif self._rdt_enabled is True and not rdt_available:
            log.error('RDT explicitly enabled but not available - exiting!')
            return 1

        if self._rdt_enabled:
            # Resctrl is enabled and available, call a placeholder to allow further initialization.
            rdt_initialization_ok = self._initialize_rdt()
            if not rdt_initialization_ok:
                return 1

        # Postpone the container manager initialization after rdt checks were performed.
        platform_cpus, _, platform_sockets = platforms.collect_topology_information(
        )

        platform, _, _ = platforms.collect_platform_information(
            self._rdt_enabled)
        rdt_information = platform.rdt_information

        self._event_names = _filter_out_event_names_for_cpu(
            self._event_names, platform.cpu_codename)

        # We currently do not support RDT without monitoring.
        if self._rdt_enabled and not rdt_information.is_monitoring_enabled():
            log.error('RDT monitoring is required - please enable CAT '
                      'or MBM with kernel parameters!')
            return 1

        self._containers_manager = ContainerManager(
            platform=platform,
            allocation_configuration=self._allocation_configuration,
            event_names=self._event_names,
            enable_derived_metrics=self._enable_derived_metrics,
        )
        return None
Пример #2
0
def test_sync_containers_state(_, get_pids_mock, sync_mock, perf_counters_mock,
                               add_pids_mock, clean_taskless_groups_mock,
                               subcgroups,
                               tasks_, pre_running_containers_,
                               mon_groups_relation, expected_running_containers_,
                               labels_relation_, pre_running_labels_relation_):
    """Tests both Container and ContainerSet classes.

        Note: the input arguments tasks_, existing_containers_, expected_running_containers_
        contain in their names underscore at the end to distinguish them from the ones
        created inside the function body to emphasize the relationship: the input arguments
        is used to create real objects. We cannot pass already created objects, as to
        create them we need another argument from first of two paramatrize decorators:
        subcgroups.

        Note: we have three variables names with the same postfix:
        * pre_running_containers - state of ContainerManager before (pre) call sync_containers_state
        * expected_running_containers - similar as above but state expected after the call,
        * got_running_containers - similar as above but state which we got after the call.
        All of three are of the same type Dict[Task, ContainerInterface].
        """
    # Create Task and Container/ContainerSet objects from input arguments.
    #   This is done to both test Container and ContainerSet classes (to pass
    #   subcgroups argument into the constructing function >>container<<.
    tasks = [task(t, subcgroups_paths=subcgroups, labels=labels_relation_.get(t)) for t in tasks_]
    pre_running_containers = \
        {task(t, subcgroups_paths=subcgroups,
              labels=pre_running_labels_relation_.get(t)): container(c, subcgroups)
         for t, c in pre_running_containers_.items()}

    expected_running_containers = \
        {task(t, subcgroups_paths=subcgroups,
              labels=labels_relation_.get(t)): container(c, subcgroups)
         for t, c in expected_running_containers_.items()}

    rdt_information = RDTInformation(True, True, True, True, 'fff', '1', 0, 0, 0)
    platform_mock = Mock(
        spec=Platform,
        sockets=1,
        cores=1,
        cpus=1,
        rdt_information=rdt_information)

    containers_manager = ContainerManager(platform=platform_mock,
                                          allocation_configuration=AllocationConfiguration(),
                                          event_names=[],
                                          )
    # Put in into ContainerManager our input dict of containers.
    containers_manager.containers = dict(pre_running_containers)

    # Call sync_containers_state
    with patch('wca.resctrl.read_mon_groups_relation', return_value=mon_groups_relation):
        got_running_containers = containers_manager.sync_containers_state(tasks)

    # -----------------------
    # Assert that two sets of keys of two dictionaries got_containers and
    # expected_running_containers are equal.
    assert len(got_running_containers) == len(expected_running_containers)
    assert all([expected_task in got_running_containers
                for expected_task in expected_running_containers.keys()])
    for t in expected_running_containers.keys():
        assert_equal_containers(expected_running_containers[t], got_running_containers[t])

    # Check container objects has proper resgroup assigned.
    got_container_resgroup_names = {c.get_name():
                                    c.get_resgroup().name for c in got_running_containers.values()}
    for expected_resgroup_name, container_names in mon_groups_relation.items():
        for container_name in container_names:
            if container_name in got_container_resgroup_names:
                got_resgroup_name = got_container_resgroup_names.get(container_name)
                assert got_resgroup_name == expected_resgroup_name
class MeasurementRunner(Runner):
    """MeasurementRunner run iterations to collect platform, resource, task measurements
    and store them in metrics_storage component.

    Arguments:
        node: component used for tasks discovery
        metrics_storage: storage to store platform, internal, resource and task metrics
            (defaults to DEFAULT_STORAGE/LogStorage to output for standard error)
        action_delay: iteration duration in seconds (None disables wait and iterations)
            (defaults to 1 second)
        rdt_enabled: enables or disabled support for RDT monitoring
            (defaults to None(auto) based on platform capabilities)
        extra_labels: additional labels attached to every metrics
            (defaults to empty dict)
        event_names: perf counters to monitor
            (defaults to instructions, cycles, cache-misses, memstalls)
        enable_derived_metrics: enable derived metrics ips, ipc and cache_hit_ratio
            (based on enabled_event names), default to False
    """

    def __init__(
            self,
            node: nodes.Node,
            metrics_storage: storage.Storage = DEFAULT_STORAGE,
            action_delay: Numeric(0, 60) = 1.,  # [s]
            rdt_enabled: Optional[bool] = None,  # Defaults(None) - auto configuration.
            extra_labels: Dict[Str, Str] = None,
            event_names: List[str] = None,
            enable_derived_metrics: bool = False,
            _allocation_configuration: Optional[AllocationConfiguration] = None,
    ):

        self._node = node
        self._metrics_storage = metrics_storage
        self._action_delay = action_delay
        self._rdt_enabled = rdt_enabled
        # Disabled by default, to be overridden by subclasses.
        self._rdt_mb_control_required = False
        # Disabled by default, to overridden by subclasses.
        self._rdt_cache_control_required = False
        self._extra_labels = extra_labels or dict()
        self._finish = False  # Guard to stop iterations.
        self._last_iteration = time.time()  # Used internally by wait function.
        self._allocation_configuration = _allocation_configuration
        self._event_names = event_names or DEFAULT_EVENTS
        self._enable_derived_metrics = enable_derived_metrics

    @profiler.profile_duration(name='sleep')
    def _wait(self):
        """Decides how long one iteration should take.
        Additionally calculate residual time, based on time already taken by iteration.
        """
        now = time.time()
        iteration_duration = now - self._last_iteration

        residual_time = max(0., self._action_delay - iteration_duration)
        time.sleep(residual_time)
        self._last_iteration = time.time()

    def _initialize(self) -> Optional[int]:
        """Check privileges, RDT availability and prepare internal state.
        Can return error code that should stop Runner.
        """
        if not security.are_privileges_sufficient(self._rdt_enabled):
            log.error("Impossible to use perf_event_open/resctrl subsystems. "
                      "You need to: adjust /proc/sys/kernel/perf_event_paranoid (set to -1); "
                      "or has CAP_DAC_OVERRIDE and CAP_SETUID capabilities set."
                      "You can run process as root too.")
            return 1

        # Initialization (auto discovery Intel RDT features).

        rdt_available = resctrl.check_resctrl()
        if self._rdt_enabled is None:
            self._rdt_enabled = rdt_available
            log.info('RDT enabled (auto configuration): %s', self._rdt_enabled)
        elif self._rdt_enabled is True and not rdt_available:
            log.error('RDT explicitly enabled but not available - exiting!')
            return 1

        if self._rdt_enabled:
            # Resctrl is enabled and available, call a placeholder to allow further initialization.
            rdt_initialization_ok = self._initialize_rdt()
            if not rdt_initialization_ok:
                return 1

        # Postpone the container manager initialization after rdt checks were performed.
        platform_cpus, _, platform_sockets = platforms.collect_topology_information()

        platform, _, _ = platforms.collect_platform_information(self._rdt_enabled)
        rdt_information = platform.rdt_information

        # We currently do not support RDT without monitoring.
        if self._rdt_enabled and not rdt_information.is_monitoring_enabled():
            log.error('RDT monitoring is required - please enable CAT '
                      'or MBM with kernel parameters!')
            return 1

        self._containers_manager = ContainerManager(
            rdt_information=rdt_information,
            platform_cpus=platform_cpus,
            platform_sockets=platform_sockets,
            allocation_configuration=self._allocation_configuration,
            event_names=self._event_names,
            enable_derived_metrics=self._enable_derived_metrics,
        )
        return None

    def _iterate(self):
        iteration_start = time.time()

        # Get information about tasks.
        tasks = self._node.get_tasks()
        log.debug('Tasks detected: %d', len(tasks))

        for task in tasks:
            sanitized_labels = dict()
            for label_key, label_value in task.labels.items():
                sanitized_labels.update({sanitize_label(label_key):
                                         label_value})
            task.labels = sanitized_labels

        # Keep sync of found tasks and internally managed containers.
        containers = self._containers_manager.sync_containers_state(tasks)

        # Platform information
        platform, platform_metrics, platform_labels = platforms.collect_platform_information(
            self._rdt_enabled)

        # Common labels
        common_labels = dict(platform_labels, **self._extra_labels)

        # Tasks data
        tasks_measurements, tasks_resources, tasks_labels = _prepare_tasks_data(containers)
        tasks_metrics = _build_tasks_metrics(tasks_labels, tasks_measurements)

        self._iterate_body(containers, platform, tasks_measurements, tasks_resources,
                           tasks_labels, common_labels)

        self._wait()

        iteration_duration = time.time() - iteration_start
        profiling.profiler.register_duration('iteration', iteration_duration)

        # Generic metrics.
        metrics_package = MetricPackage(self._metrics_storage)
        metrics_package.add_metrics(_get_internal_metrics(tasks))
        metrics_package.add_metrics(platform_metrics)
        metrics_package.add_metrics(tasks_metrics)
        metrics_package.add_metrics(profiling.profiler.get_metrics())
        metrics_package.add_metrics(get_logging_metrics())
        metrics_package.send(common_labels)

    def run(self) -> int:
        """Loop that gathers platform and tasks metrics and calls _iterate_body.
        _iterate_body is a method to be subclassed.
        """
        error_code = self._initialize()
        if error_code is not None:
            return error_code

        while True:
            self._iterate()

            if self._finish:
                break

        # Cleanup phase.
        self._containers_manager.cleanup()
        return 0

    def _iterate_body(self, containers, platform, tasks_measurements, tasks_resources,
                      tasks_labels, common_labels):
        """No-op implementation of inner loop body - called by iterate"""

    def _initialize_rdt(self) -> bool:
        """Nothing to configure in RDT to measure resource usage.
        Returns state of rdt initialization (True ok, False for error)
        """
        return True
class MeasurementRunner(Runner):
    """rst

    MeasurementRunner run iterations to collect platform, resource, task measurements
    and store them in metrics_storage component.

    - `node`: **type**:

        Component used for tasks discovery.

    - ``metrics_storage``: **type** = `DEFAULT_STORAGE`

        Storage to store platform, internal, resource and task metrics.
        (defaults to DEFAULT_STORAGE/LogStorage to output for standard error)

    - ``interval``: **Numeric(0,60)** = *1.*

        Iteration duration in seconds (None disables wait and iterations).
        (defaults to 1 second)

    - ``rdt_enabled``: **Optional[bool]** = *None*

        Enables or disabled support for RDT monitoring.
        (defaults to None(auto) based on platform capabilities)

    - ``gather_hw_mm_topology``: **bool** = *False*

        Gather hardware/memory topology based on lshw and ipmctl.
        (defaults to False)

    - ``extra_labels``: **Optional[Dict[Str, Str]]** = *None*

        Additional labels attached to every metrics.
        (defaults to empty dict)

    - ``event_names``: **List[str]** = `[]`

        Perf counters to monitor.
        (defaults to not collect perf counters - empty list of events)

    - ``perf_aggregate_cpus``: **bool** = `True`,
        Should perf events collected for cgroups be aggregated (sum) by CPUs.
        (defaults to true, to limit number of exposed metrics)

    - ``enable_derived_metrics``: **bool** = *False*

        Enable derived metrics ips, ipc and cache_hit_ratio.
        (based on enabled_event names, default to False)

    - ``enable_perf_uncore``: **bool** = *None*

        Enable perf event uncore metrics.
        (defaults to None - automatic, if available enable)

    - ``task_label_generators``: **Optional[Dict[str, TaskLabelGenerator]]** = *None*

        Component to generate additional labels for tasks.
        (optional)

    - ``allocation_configuration``: **Optional[AllocationConfiguration]** = *None*

        Allows fine grained control over allocations.
        (defaults to AllocationConfiguration() instance)

    - ``wss_reset_interval``: **int** = *0*

        Interval of reseting wss.
        (defaults to 0, not measured)

    - ``include_optional_labels``: **bool** = *False*

        Include optional labels like: sockets, cpus, cpu_model
        (defaults to False)
    """

    def __init__(
            self,
            node: Node,
            metrics_storage: Storage = DEFAULT_STORAGE,
            interval: Numeric(0, 60) = 1.,
            rdt_enabled: Optional[bool] = None,
            gather_hw_mm_topology: bool = False,
            extra_labels: Optional[Dict[Str, Str]] = None,
            event_names: List[str] = [],
            perf_aggregate_cpus: bool = True,
            enable_derived_metrics: bool = False,
            enable_perf_uncore: Optional[bool] = None,
            task_label_generators: Optional[Dict[str, TaskLabelGenerator]] = None,
            allocation_configuration: Optional[AllocationConfiguration] = None,
            wss_reset_interval: int = 0,
            include_optional_labels: bool = False
    ):

        self._node = node
        self._metrics_storage = metrics_storage
        self._interval = interval
        self._rdt_enabled = rdt_enabled
        self._gather_hw_mm_topology = gather_hw_mm_topology
        self._include_optional_labels = include_optional_labels

        self._extra_labels = {k: str(v) for k, v in
                              extra_labels.items()} if extra_labels else dict()
        log.debug('Extra labels: %r', self._extra_labels)
        self._finish = False  # Guard to stop iterations.
        self._last_iteration = time.time()  # Used internally by wait function.
        self._allocation_configuration = allocation_configuration
        self._event_names = event_names
        log.info('Enabling %i perf events: %s', len(self._event_names),
                 ', '.join(self._event_names))
        self._perf_aggregate_cpus = perf_aggregate_cpus

        # TODO: fix those workarounds for dynamic levels and dynamic perf event metrics.
        # First add dynamic metrics
        for event_name in event_names:
            # is dynamic raw event
            if '__r' in event_name:
                log.debug('Creating metadata for dynamic metric: %r', event_name)
                METRICS_METADATA[event_name] = MetricMetadata(
                    'Hardware PMU counter (raw event)',
                    MetricType.COUNTER,
                    MetricUnit.NUMERIC,
                    MetricSource.PERF_SUBSYSTEM_WITH_CGROUPS,
                    MetricGranularity.TASK,
                    [],
                    'no (event_names)',
                )
        # We had the modify levels for all metrics
        # The set proper levels based on perf_aggregate_cpus value
        if not perf_aggregate_cpus:
            log.debug('Enabling "cpu" level for PERF_SUBSYSTEM_WITH_CGROUPS metrics.')
            for metric_metadata in METRICS_METADATA.values():
                if metric_metadata.source == MetricSource.PERF_SUBSYSTEM_WITH_CGROUPS:
                    metric_metadata.levels = ['cpu']

        self._enable_derived_metrics = enable_derived_metrics
        self._enable_perf_uncore = enable_perf_uncore

        # Default value for task_labels_generator.
        if task_label_generators is None:
            self._task_label_generators = {
                'application':
                    TaskLabelRegexGenerator('$', '', 'task_name'),
                'application_version_name':
                    TaskLabelRegexGenerator('.*$', '', 'task_name'),
            }
        else:
            self._task_label_generators = task_label_generators

        self._wss_reset_interval = wss_reset_interval

        self._uncore_pmu = None

        self._initialize_rdt_callback = None
        self._iterate_body_callback = None

    def _set_initialize_rdt_callback(self, func):
        self._initialize_rdt_callback = func

    def _set_iterate_body_callback(self, func):
        self._iterate_body_callback = func

    @profiler.profile_duration(name='sleep')
    def _wait(self):
        """Decides how long one iteration should take.
        Additionally calculate residual time, based on time already taken by iteration.
        """
        now = time.time()
        iteration_duration = now - self._last_iteration

        residual_time = max(0., self._interval - iteration_duration)
        time.sleep(residual_time)
        self._last_iteration = time.time()

    def _initialize(self) -> Optional[int]:
        """Check RDT availability, privileges and prepare internal state.
        Can return error code that should stop Runner.
        """

        # Initialization (auto discovery Intel RDT features).
        rdt_available = resctrl.check_resctrl()
        if self._rdt_enabled is None:
            self._rdt_enabled = rdt_available
            log.info('RDT enabled (auto configuration): %s', self._rdt_enabled)
        elif self._rdt_enabled is True and not rdt_available:
            log.error('RDT explicitly enabled but not available - exiting!')
            return 1

        # _allocation_configuration is set in allocation mode (AllocationRunner)
        # so we need access to write in cgroups.
        write_to_cgroup = self._allocation_configuration is not None
        use_resctrl = self._rdt_enabled
        use_perf = len(self._event_names) > 0

        if not security.are_privileges_sufficient(write_to_cgroup, use_resctrl, use_perf):
            return 1

        if self._rdt_enabled:
            # Resctrl is enabled and available, call a placeholder to allow further initialization.
            # For MeasurementRunner it's nothing to configure in RDT to measure resource usage.

            # Check if it's needed to specific rdt initialization in case
            # of using MeasurementRunner functionality in other runner.
            if self._initialize_rdt_callback is not None:
                rdt_initialization_ok = self._initialize_rdt_callback()

                if not rdt_initialization_ok:
                    return 1

        log.debug('rdt_enabled: %s', self._rdt_enabled)
        log.debug('gather_hw_mm_topology: %s', self._gather_hw_mm_topology)
        platform, _, _ = platforms.collect_platform_information(
            self._rdt_enabled,
            gather_hw_mm_topology=self._gather_hw_mm_topology
        )
        rdt_information = platform.rdt_information

        self._event_names = _filter_out_event_names_for_cpu(
            self._event_names, platform.cpu_codename)

        # We currently do not support RDT without monitoring.
        if self._rdt_enabled and not rdt_information.is_monitoring_enabled():
            log.error('RDT monitoring is required - please enable CAT '
                      'or MBM with kernel parameters!')
            return 1

        self._containers_manager = ContainerManager(
            platform=platform,
            allocation_configuration=self._allocation_configuration,
            event_names=self._event_names,
            enable_derived_metrics=self._enable_derived_metrics,
            wss_reset_interval=self._wss_reset_interval,
            perf_aggregate_cpus=self._perf_aggregate_cpus
        )

        self._init_uncore_pmu(self._enable_derived_metrics, self._enable_perf_uncore, platform)

        return None

    def _init_uncore_pmu(self, enable_derived_metrics, enable_perf_uncore,
                         platform: platforms.Platform):
        strict_mode = enable_perf_uncore is True
        _enable_perf_uncore = enable_perf_uncore in (True, None)
        self._uncore_pmu = None
        self._uncore_get_measurements = lambda: {}
        if _enable_perf_uncore:
            pmu_events = {}
            try:
                # Cpus and events for perf uncore imc
                cpus_imc, pmu_events_imc = _discover_pmu_uncore_config(
                    UNCORE_IMC_EVENTS, 'uncore_imc_')
                pmu_events.update(pmu_events_imc)
                # Cpus and events for perf uncore upi
                cpus_upi, pmu_events_upi = _discover_pmu_uncore_config(
                    UNCORE_UPI_EVENTS, 'uncore_upi_')
                pmu_events.update(pmu_events_upi)

                cpus = list(set(cpus_imc + cpus_upi))

            except PMUNotAvailable as e:
                self._uncore_pmu = None
                self._uncore_get_measurements = lambda: {}
                if strict_mode:
                    raise
                else:
                    log.warning('Perf pmu metrics requested, but not available. '
                                'Not collecting perf pmu metrics! '
                                'error={}'.format(e))
                    return

            # Prepare uncore object
            self._uncore_pmu = UncorePerfCounters(
                cpus=cpus,
                pmu_events=pmu_events,
                platform=platform,
            )

            # Wrap with derived..
            if enable_derived_metrics:
                self._uncore_derived_metrics = UncoreDerivedMetricsGenerator(
                    self._uncore_pmu.get_measurements)
                self._uncore_get_measurements = self._uncore_derived_metrics.get_measurements
            else:
                self._uncore_get_measurements = self._uncore_pmu.get_measurements

    def _iterate(self):
        iteration_start = time.time()

        # Get information about tasks.
        try:
            tasks = self._node.get_tasks()
        except TaskSynchronizationException as e:
            log.error('Cannot synchronize tasks with node (error=%s) - skip this iteration!', e)
            self._wait()
            return

        append_additional_labels_to_tasks(self._task_label_generators, tasks)
        log.debug('Tasks detected: %d', len(tasks))

        # Keep sync of found tasks and internally managed containers.
        containers = self._containers_manager.sync_containers_state(tasks)
        log.log(TRACE, 'Tasks container mapping:\n%s', '\n'.join(
            ['%s(%s)  =  %s' % (task.name, task.task_id, container._cgroup_path) for task, container
             in containers.items()]))

        # @TODO why not in platform module?
        extra_platform_measurements = self._uncore_get_measurements()

        # Platform information
        platform, platform_metrics, platform_labels = platforms.collect_platform_information(
            self._rdt_enabled, self._gather_hw_mm_topology,
            extra_platform_measurements=extra_platform_measurements,
            include_optional_labels=False,
        )

        # Common labels
        common_labels = dict(platform_labels, **self._extra_labels)

        try:
            tasks_data = _prepare_tasks_data(containers)
        except MissingMeasurementException as e:
            log.error('Cannot synchronize tasks measurements (error=%s) - skip this iteration!', e)
            self._wait()
            return

        # Inject other runners code.
        if self._iterate_body_callback is not None:
            self._iterate_body_callback(containers, platform, tasks_data, common_labels)

        self._wait()

        iteration_duration = time.time() - iteration_start
        profiling.profiler.register_duration('iteration', iteration_duration)

        # Generic metrics.
        metrics_package = MetricPackage(self._metrics_storage)
        metrics_package.add_metrics(_get_internal_metrics(tasks))
        metrics_package.add_metrics(platform_metrics)
        metrics_package.add_metrics(_build_tasks_metrics(tasks_data))
        metrics_package.add_metrics(profiling.profiler.get_metrics())
        metrics_package.add_metrics(get_logging_metrics())
        metrics_package.send(common_labels)

    def run(self) -> int:
        """Loop that gathers platform and tasks metrics and calls _iterate_body.
        _iterate_body is a method to be subclassed.
        """
        error_code = self._initialize()
        if error_code is not None:
            return error_code

        while True:
            self._iterate()

            if self._finish:
                break

        # Cleanup phase.
        self._containers_manager.cleanup()
        return 0
    def _initialize(self) -> Optional[int]:
        """Check RDT availability, privileges and prepare internal state.
        Can return error code that should stop Runner.
        """

        # Initialization (auto discovery Intel RDT features).
        rdt_available = resctrl.check_resctrl()
        if self._rdt_enabled is None:
            self._rdt_enabled = rdt_available
            log.info('RDT enabled (auto configuration): %s', self._rdt_enabled)
        elif self._rdt_enabled is True and not rdt_available:
            log.error('RDT explicitly enabled but not available - exiting!')
            return 1

        # _allocation_configuration is set in allocation mode (AllocationRunner)
        # so we need access to write in cgroups.
        write_to_cgroup = self._allocation_configuration is not None
        use_resctrl = self._rdt_enabled
        use_perf = len(self._event_names) > 0

        if not security.are_privileges_sufficient(write_to_cgroup, use_resctrl, use_perf):
            return 1

        if self._rdt_enabled:
            # Resctrl is enabled and available, call a placeholder to allow further initialization.
            # For MeasurementRunner it's nothing to configure in RDT to measure resource usage.

            # Check if it's needed to specific rdt initialization in case
            # of using MeasurementRunner functionality in other runner.
            if self._initialize_rdt_callback is not None:
                rdt_initialization_ok = self._initialize_rdt_callback()

                if not rdt_initialization_ok:
                    return 1

        log.debug('rdt_enabled: %s', self._rdt_enabled)
        log.debug('gather_hw_mm_topology: %s', self._gather_hw_mm_topology)
        platform, _, _ = platforms.collect_platform_information(
            self._rdt_enabled,
            gather_hw_mm_topology=self._gather_hw_mm_topology
        )
        rdt_information = platform.rdt_information

        self._event_names = _filter_out_event_names_for_cpu(
            self._event_names, platform.cpu_codename)

        # We currently do not support RDT without monitoring.
        if self._rdt_enabled and not rdt_information.is_monitoring_enabled():
            log.error('RDT monitoring is required - please enable CAT '
                      'or MBM with kernel parameters!')
            return 1

        self._containers_manager = ContainerManager(
            platform=platform,
            allocation_configuration=self._allocation_configuration,
            event_names=self._event_names,
            enable_derived_metrics=self._enable_derived_metrics,
            wss_reset_interval=self._wss_reset_interval,
            perf_aggregate_cpus=self._perf_aggregate_cpus
        )

        self._init_uncore_pmu(self._enable_derived_metrics, self._enable_perf_uncore, platform)

        return None
Пример #6
0
class MeasurementRunner(Runner):
    """rst

    MeasurementRunner run iterations to collect platform, resource, task measurements
    and store them in metrics_storage component.

    - `node`: **type**:

        Component used for tasks discovery.

    - ``metrics_storage``: **type** = `DEFAULT_STORAGE`

        Storage to store platform, internal, resource and task metrics.
        (defaults to DEFAULT_STORAGE/LogStorage to output for standard error)

    - ``interval``: **Numeric(0,60)** = *1.*

        Iteration duration in seconds (None disables wait and iterations).
        (defaults to 1 second)

    - ``rdt_enabled``: **Optional[bool]** = *None*

        Enables or disabled support for RDT monitoring.
        (defaults to None(auto) based on platform capabilities)

    - ``gather_hw_mm_topology``: **Optional[bool]** = *None*

        Gather hardware/memory topology based on dmidecode and ipmctl.
        (defaults to None(auto) based on dmidecode and ipmctl binaries availability)

    - ``extra_labels``: **Optional[Dict[Str, Str]]** = *None*

        Additional labels attached to every metrics.
        (defaults to empty dict)

    - ``event_names``: **List[str]** = `[]`

        Perf counters to monitor.
        (defaults to not collect perf counters - empty list of events)

    - ``perf_aggregate_cpus``: **bool** = `True`

        Should perf events collected for cgroups be aggregated (sum) by CPUs.
        (defaults to true, to limit number of exposed metrics)

    - ``enable_derived_metrics``: **bool** = *False*

        Enable derived metrics ips, ipc and cache_hit_ratio.
        (based on enabled_event names, default to False)

    - ``uncore_event_names``: **List[str]** = `[]`

        Enable perf event uncore metrics.
        (defaults to None - automatic, if available enable)

    - ``task_label_generators``: **Optional[Dict[str, TaskLabelGenerator]]** = *None*

        Component to generate additional labels for tasks.
        (optional)

    - ``allocation_configuration``: **Optional[AllocationConfiguration]** = *None*

        Allows fine grained control over allocations.
        (defaults to AllocationConfiguration() instance)

    - ``wss_reset_cycles``: **Optional[int]** = *None*

        Interval of resetting WSS (WorkingSetSize).
        (defaults to None, which means that metric is not collected at all, e.g. when set to 1
        ``clear_refs`` will be reset every measurement iteration defined by global ``interval``
        option.)
        If set to 0, referenced bytes will be collected but will not be reset in cycling manner.

    - ``wss_stable_cycles``: **int** = *0*

        Number of stable cycles after which "referenced bytes rate" is considered stable.
        Optionaly if postive and wss_reset_cycles is 0, then after stabilization period
        will reset "referenced bytes".

        It's behavior depends on wss_reset_cycles:
        - completly ignored if wss_reset_cycles is None (referenced bytes and WSS is disabled).
        - if "wss_reset_cycles" is set to special value "0" and "wss_stable_cycles" is positive then
          after achieving stability "referenced bytes" will be reset (to restart cycle).

        Can be specified as neagtive number which means that stabililty check is enabled
        but after stabilization the "referenced bytes" will not bet reset
        (relay on wss_reset_cycles to be positive and reset).

        Expressed in number of WCA measurements intervals (cycles).
        E.g. if global interval is set to 15s and wss_stable_cycles is set to 40 cycles,
        the "stability condition" is met in consecutive 40 cycles (about 600s = 10 minutes).


    - ``wss_membw_threshold``: **Optional[float]** = *None*

        Value used to calculate threshold based on fraction of memory bandwidth (transferred bytes)
        to treat referenced value as stable and return WSS.
        Memory bandwidth multiplied by this value.  None means condition is ignored and
        task_working_set_size_bytes metric will not be collected.

        E.g. 0.1 means membw * 0.1 = which equals to 10% of memory bandwidth.

    - ``include_optional_labels``: **bool** = *False*

        Attach following labels to all metrics:
        `sockets`, `cores`, `cpus`, `cpu_model`, `cpu_model_number` and `wca_version`

    - ``zoneinfo``: **Union[Str, bool]** = *True*

        By default when zoneinfo is enabled, all the metrics matching to '{name} {value}'
        will be collected.  False means disable the collection.

        If string is provided it will be used as regexp to extract information from /proc/zoneinfo
        (only matching regexp will be collected). Regexp should contains two groups. When zoneinfo
        is True default value for this regexp can parse values like "nr_pages 1234".

    - ``vmstat``: **Union[Str, bool]** = *True*

        By default when vmstat is enabled, all the metrics matching to '{name} {value}'
        will be collected.  False means disable the collection.

        If string is provided it will be used as regexp to match key.

    - ``sched``: **Union[Str, bool]** = *False*

        Responsible for collecting data from /proc/PID/sched metric:
        - task_sched_stat (lines with ':'),
        - task_sched_stat_numa_faults (numa_faults field).
        By default sched is enabled and all metrics (lines from /proc/PID/sched containg ':')
        will be collected.  False means disable the collection.

        If string is provided it will be used as regexp to match key (string before ':')
    """

    def __init__(
            self,
            node: Node,
            metrics_storage: Storage = DEFAULT_STORAGE,
            interval: Numeric(0, 60) = 1.,
            rdt_enabled: Optional[bool] = None,
            gather_hw_mm_topology: Optional[bool] = None,
            extra_labels: Optional[Dict[Str, Str]] = None,
            event_names: List[str] = [],
            perf_aggregate_cpus: bool = True,
            enable_derived_metrics: bool = False,
            uncore_event_names: List[Union[List[str], str]] = [],
            task_label_generators: Optional[Dict[str, TaskLabelGenerator]] = None,
            allocation_configuration: Optional[AllocationConfiguration] = None,
            wss_reset_cycles: Optional[int] = None,
            wss_stable_cycles: int = 0,
            wss_membw_threshold: Optional[float] = None,
            include_optional_labels: bool = False,
            zoneinfo: Union[Str, bool] = True,
            vmstat: Union[Str, bool] = True,
            sched: Union[Str, bool] = False,
    ):

        self._node = node
        self._metrics_storage = metrics_storage
        self._interval = interval
        self._rdt_enabled = rdt_enabled
        self._gather_hw_mm_topology = gather_hw_mm_topology
        self._include_optional_labels = include_optional_labels

        self._extra_labels = {k: str(v) for k, v in
                              extra_labels.items()} if extra_labels else dict()
        log.debug('Extra labels: %r', self._extra_labels)
        self._finish = False  # Guard to stop iterations.
        self._last_iteration = time.time()  # Used internally by wait function.
        self._allocation_configuration = allocation_configuration
        self._event_names = event_names
        self._perf_aggregate_cpus = perf_aggregate_cpus

        # TODO: fix those workarounds for dynamic levels and dynamic perf event metrics.
        # First add dynamic metrics
        for event_name in event_names:
            # is dynamic raw event
            if '__r' in event_name:
                log.debug('Creating metadata for dynamic metric: %r', event_name)
                METRICS_METADATA[event_name] = MetricMetadata(
                    'Hardware PMU counter (raw event)',
                    MetricType.COUNTER,
                    MetricUnit.NUMERIC,
                    MetricSource.PERF_SUBSYSTEM_WITH_CGROUPS,
                    MetricGranularity.TASK,
                    [],
                    'no (event_names)',
                )
        # We had the modify levels for all metrics
        # The set proper levels based on perf_aggregate_cpus value
        if not perf_aggregate_cpus:
            log.debug('Enabling "cpu" level for PERF_SUBSYSTEM_WITH_CGROUPS and derived metrics.')
            for metric_metadata in METRICS_METADATA.values():
                if metric_metadata.source == MetricSource.PERF_SUBSYSTEM_WITH_CGROUPS:
                    metric_metadata.levels = ['cpu']
                if metric_metadata.source == MetricSource.DERIVED_PERF_WITH_CGROUPS:
                    metric_metadata.levels = ['cpu']

        self._enable_derived_metrics = enable_derived_metrics
        self._uncore_events = uncore_event_names

        self._task_label_generators = task_label_generators or {}

        self._wss_reset_cycles = wss_reset_cycles
        self._wss_stable_cycles = wss_stable_cycles
        self._wss_membw_threshold = wss_membw_threshold

        self._uncore_pmu = None

        self._initialize_rdt_callback = None
        self._iterate_body_callback = None
        self._cached_bandwidth = None

        if zoneinfo is True:
            self._zoneinfo = zoneinfo
            zoneinfo_regexp = zoneinfo_module.DEFAULT_REGEXP
            log.debug('Enabled zoneinfo collection')
        elif zoneinfo is False:
            self._zoneinfo = zoneinfo
            log.debug('Disabled zoneinfo collection')
            zoneinfo_regexp = None
        else:
            zoneinfo_regexp = zoneinfo
            self._zoneinfo = True

        # Validate zoneinfo regexp.
        log.debug('zoneinfo=%r regexp=%r', self._zoneinfo, zoneinfo_regexp)
        self._zoneinfo_regexp_compiled = None
        if self._zoneinfo:
            try:
                self._zoneinfo_regexp_compiled = re.compile(zoneinfo_regexp)
            except re.error as e:
                raise ValidationError('zoneinfo_regexp_compile improper regexp: %s' % e)

            if not self._zoneinfo_regexp_compiled.groups == 2:
                raise ValidationError(
                    'zoneinfo_regexp_compile improper number of groups: should be 2')

        # Validate config and vmstat regexp.
        if vmstat in (True, False):
            self._vmstat = vmstat
        else:
            # Got regexp - compile and check...
            try:
                self._vmstat = re.compile(vmstat)
            except re.error as e:
                raise ValidationError('vmstat_regexp_compile improper regexp: %s' % e)

        # Validate config and sched regexp.
        if sched in (True, False):
            self._sched = sched
        else:
            # Got regexp - compile and check...
            try:
                self._sched = re.compile(sched)
            except re.error as e:
                raise ValidationError('sched regex compile improper regexp: %s' % e)

    def _set_initialize_rdt_callback(self, func):
        self._initialize_rdt_callback = func

    def _set_iterate_body_callback(self, func):
        self._iterate_body_callback = func

    @profiler.profile_duration(name='sleep')
    def _wait(self):
        """Decides how long one iteration should take.
        Additionally calculate residual time, based on time already taken by iteration.
        """
        now = time.time()
        iteration_duration = now - self._last_iteration

        residual_time = max(0., self._interval - iteration_duration)
        time.sleep(residual_time)
        self._last_iteration = time.time()

    def _initialize(self) -> Optional[int]:
        """Check RDT availability, privileges and prepare internal state.
        Can return error code that should stop Runner.

        Flow:
        - Conclude requirements based on configuration
        - Conclude required features based on auto discovery
        - confront user expectations from configuration file with resctrl fs and security access
        - check RDT HW monitoring features availability
        """
        resctrl_available = resctrl.check_resctrl()
        # If enabled explicitly check resctrl availability right now.
        if self._rdt_enabled is True and not resctrl_available:
            log.error('RDT explicitly enabled but resctrl fs not available - exiting!')
            return 1

        # Auto discovery Intel RDT features.
        if self._rdt_enabled is None:
            # Assume yes temporary - but will check monitoring/access later.
            log.debug('Enable RDT auto discovery (resctrl availability=%s)', resctrl_available)
            self._rdt_enabled = resctrl_available
            rdt_auto_enabling = True
        else:
            rdt_auto_enabling = False

        log.debug('gather_hw_mm_topology: %s', self._gather_hw_mm_topology)
        platform, _, _ = platforms.collect_platform_information(
            resctrl_available,
            gather_hw_mm_topology=self._gather_hw_mm_topology
        )

        # Confront RDT (resctrl fs) with HW enabled monitoring features.
        if self._rdt_enabled and not platform.rdt_information.is_monitoring_enabled():
            # Note: WCA does not support RDT without monitoring (keeps a mapping of
            # cgroups and resctrl groups).
            msg = ('Resctrl is available but RDT monitoring features are not!' +
                   'Please enable CMT or MBM with kernel parameters (monitoring is ' +
                   'required for CAT or MBA allocation)!')
            if rdt_auto_enabling:
                log.debug(msg)
                self._rdt_enabled = False
                platform.rdt_information = None
                # override rdt information should not be available later
                # e.g. ContainerManager
            else:
                # If RDT was force fail short here.
                log.error(msg)
                return 1

        # All RDT checks (security/check) done - show info and call initialization callback.
        log.info('RDT: %s %s', 'enabled' if self._rdt_enabled else 'disabled',
                 ' (auto discovery)' if rdt_auto_enabling else '',
                 )

        # Event names (perf cgroups)
        self._event_names = filter_out_event_names_for_cpu(
            self._event_names, platform.cpu_codename)

        log.info('Enabling %i perf events (for cgroups).', len(self._event_names))
        log.debug('Enabling perf events: %s', ', '.join(self._event_names))
        # Check and assume most popular number of available number of HW counters.
        if self._event_names:
            if not check_perf_event_count_limit(self._event_names, platform.cpus, platform.cores):
                return 1

        # _allocation_configuration is set in allocation mode (AllocationRunner)
        # so we need access to write in cgroups.
        write_to_cgroup = self._allocation_configuration is not None
        use_perf = len(self._event_names) > 0
        # Check we have enough access.
        if not security.are_privileges_sufficient(write_to_cgroup, self._rdt_enabled, use_perf):
            return 1

        # Resctrl is enabled and available, call a placeholder to allow further initialization.
        # For "measurement mode" it's nothing to configure in RDT.
        # Check if it's needed to specific rdt initialization in case
        # of using "MeasurementRunner" as component functionality in other runners e.g. Allocation.
        if self._rdt_enabled:
            if self._initialize_rdt_callback is not None:
                rdt_initialization_ok = self._initialize_rdt_callback()

                if not rdt_initialization_ok:
                    return 1

        self._containers_manager = ContainerManager(
            platform=platform,
            allocation_configuration=self._allocation_configuration,
            event_names=self._event_names,
            enable_derived_metrics=self._enable_derived_metrics,
            wss_reset_cycles=self._wss_reset_cycles,
            wss_stable_cycles=self._wss_stable_cycles,
            wss_membw_threshold=self._wss_membw_threshold,
            perf_aggregate_cpus=self._perf_aggregate_cpus,
            interval=self._interval,
            sched=self._sched,
        )
        log.log(TRACE, 'container manager config: %s', self._containers_manager.__dict__)

        self._init_uncore_pmu_events(self._enable_derived_metrics, self._uncore_events, platform)

        return None

    @staticmethod
    def _parse_uncore_event_input(event):
        available_types = ('uncore_imc', 'uncore_cha', 'uncore_upi')
        available_keys = ('event', 'umask', 'config', 'config1')
        event_value = 0
        umask = 0
        config = 0
        config1 = 0

        assert len(event) > 0, 'Uncore event must not be empty!'
        configuration = event.split('/')
        assert len(configuration) >= 3, 'Uncore event info is missing in configuration!'
        if configuration[1] not in available_types:
            raise UncoreEventConfigError('Used wrong PMU type: {}. '
                                         'Please use one of the following: '
                                         '{}'.format(configuration[1], available_types))

        if 'event=' not in configuration[2] and 'config=' not in configuration[2]:
            raise UncoreEventConfigError('Event or config value must be specified!')

        event_name = configuration[0]
        assert len(event_name) > 0, 'Uncore event name must not be empty!'
        event_type = configuration[1]
        for key_and_value in configuration[2].split(','):
            separate = key_and_value.split('=')
            key = separate[0]
            value = separate[1]
            if key == 'event':
                event_value = int(value, 16)
            elif key == 'umask':
                umask = int(value, 16)
            elif key == 'config':
                config = int(value, 16)
            elif key == 'config1':
                config1 = int(value, 16)
            else:
                raise UncoreEventConfigError(
                    'Used wrong configuration! Unknown parameter: '
                    '{}. Please use following ones: '
                    '{}'.format(key, available_keys))

        return event_name, event_value, event_type, umask, config, config1

    @staticmethod
    def _get_event_if_known(event):
        """Return event and type if event is known"""
        assert len(event) > 0
        # if metric name is known the rest of configuration will
        # be ignored even if provided by user
        name = event.split('/')[0]
        if name in UNCORE_IMC_EVENTS:
            return UNCORE_IMC_EVENTS[name], 'uncore_imc'
        elif name in UNCORE_UPI_EVENTS:
            return UNCORE_UPI_EVENTS[name], 'uncore_upi'

        return None, ''

    @staticmethod
    def _get_unknown_event(event_name, event_value, umask, config, config1):
        event = Event(name=event_name, event=event_value, umask=umask,
                      config=config, config1=config1)
        metric_metadata = MetricMetadata('Uncore metric provided by user',
                                         MetricType.GAUGE,
                                         MetricUnit.NUMERIC,
                                         MetricSource.PERF_SUBSYSTEM_UNCORE,
                                         MetricGranularity.PLATFORM,
                                         ['socket', 'pmu_type'],
                                         'yes')
        add_metric(event_name, metric_metadata)
        return event

    def _prepare_events(self, uncore_events):
        imc_events = []
        upi_events = []
        cha_events = []
        for event in uncore_events:
            e, event_type = self._get_event_if_known(event)
            if not e:
                event_name, event_value, event_type, umask, config, config1 = \
                    self._parse_uncore_event_input(event)
                e = self._get_unknown_event(event_name, event_value, umask,
                                            config, config1)
            if event_type == 'uncore_imc':
                imc_events.append(e)
            elif event_type == 'uncore_cha':
                cha_events.append(e)
            elif event_type == 'uncore_upi':
                upi_events.append(e)
        return imc_events, cha_events, upi_events

    def _init_uncore_pmu_events(self, enable_derived_metrics, uncore_events,
                                platform: platforms.Platform):
        _enable_perf_uncore = len(uncore_events) > 0
        self._uncore_pmu = []
        self._uncore_get_measurements = []
        if not _enable_perf_uncore:
            return
        if type(uncore_events[0]) == str:
            uncore_events = [uncore_events]
        for event_groups in uncore_events:
            pmu_events = {}
            imc_events, cha_events, upi_events = self._prepare_events(event_groups)
            try:
                # Cpus and events for perf uncore imc
                cpus_imc, pmu_events_imc = _discover_pmu_uncore_config(
                    imc_events, 'uncore_imc_')
                pmu_events.update(pmu_events_imc)
                # Cpus and events for perf uncore upi
                cpus_upi, pmu_events_upi = _discover_pmu_uncore_config(
                    upi_events, 'uncore_upi_')
                pmu_events.update(pmu_events_upi)
                # Cpus and events for perf uncore cha
                cpus_cha, pmu_events_cha = _discover_pmu_uncore_config(
                    cha_events, 'uncore_cha_')
                pmu_events.update(pmu_events_cha)
                cpus = list(set(cpus_imc + cpus_upi))
            except PMUNotAvailable:
                log.error('PMU metrics requested but PMU not available!')
                raise

            # Prepare uncore object
            uncore_pmu = UncorePerfCounters(
                cpus=cpus,
                pmu_events=pmu_events,
                platform=platform)
            self._uncore_pmu.append(uncore_pmu)

            # Wrap with derived..
            if enable_derived_metrics:
                derived_metrics_generator = UncoreDerivedMetricsGenerator(
                    uncore_pmu.get_measurements)
                self._uncore_get_measurements.append(derived_metrics_generator.get_measurements)
            else:
                self._uncore_get_measurements.append(uncore_pmu.get_measurements)

    def _iterate(self):
        iteration_start = time.time()

        # Get information about tasks.
        try:
            tasks = self._node.get_tasks()
        except TaskSynchronizationException as e:
            log.error('Cannot synchronize tasks with node (error=%s) - skip this iteration!', e)
            self._wait()
            return

        append_additional_labels_to_tasks(self._task_label_generators, tasks)
        log.debug('Tasks detected: %d', len(tasks))

        # Keep sync of found tasks and internally managed containers.
        containers = self._containers_manager.sync_containers_state(tasks)
        log.log(TRACE, 'Tasks container mapping:\n%s', '\n'.join(
            ['%s(%s)  =  %s' % (task.name, task.task_id, container._cgroup_path) for task, container
             in containers.items()]))

        # @TODO why not in platform module?
        extra_platform_measurements = {}
        for uncore_get_measurements in self._uncore_get_measurements:
            extra_platform_measurements.update(uncore_get_measurements())
        if self._cached_bandwidth is None:
            self._cached_bandwidth = get_bandwidth()
        extra_platform_measurements.update(self._cached_bandwidth)

        # Zoneinfo from /proc/zoneinfo
        if self._zoneinfo:
            extra_platform_measurements.update(
                zoneinfo_module.get_zoneinfo_measurements(self._zoneinfo_regexp_compiled))

        # vmstate from /proc/vmstat and /sys/devices/system/node
        if self._vmstat:
            _vmstat_regexp = None if self._vmstat in (True, False) else self._vmstat
            extra_platform_measurements.update(
                vmstats.parse_node_vmstat_keys(_vmstat_regexp))
            extra_platform_measurements.update(
                vmstats.parse_proc_vmstat_keys(_vmstat_regexp))

        # Platform information
        platform, platform_metrics, platform_labels = platforms.collect_platform_information(
            self._rdt_enabled, self._gather_hw_mm_topology,
            extra_platform_measurements=extra_platform_measurements,
            include_optional_labels=False,
        )

        # Common labels
        common_labels = dict(platform_labels, **self._extra_labels)

        try:
            tasks_data = _prepare_tasks_data(containers)
        except MissingMeasurementException as e:
            log.error('Cannot synchronize tasks measurements (error=%s) - skip this iteration!', e)
            self._wait()
            return

        # Inject other runners code.
        if self._iterate_body_callback is not None:
            self._iterate_body_callback(containers, platform, tasks_data, common_labels)

        self._wait()

        iteration_duration = time.time() - iteration_start
        profiling.profiler.register_duration('iteration', iteration_duration)

        # Generic metrics.
        metrics_package = MetricPackage(self._metrics_storage)
        metrics_package.add_metrics(_get_internal_metrics(tasks))
        metrics_package.add_metrics(platform_metrics)
        metrics_package.add_metrics(_build_tasks_metrics(tasks_data))
        metrics_package.add_metrics(profiling.profiler.get_metrics())
        metrics_package.add_metrics(get_logging_metrics())
        metrics_package.send(common_labels)

    def run(self) -> int:
        """Loop that gathers platform and tasks metrics and calls _iterate_body.
        _iterate_body is a method to be subclassed.
        """
        error_code = self._initialize()
        if error_code is not None:
            return error_code

        while True:
            self._iterate()

            if self._finish:
                break

        # Cleanup phase.
        self._containers_manager.cleanup()
        return 0
Пример #7
0
    def _initialize(self) -> Optional[int]:
        """Check RDT availability, privileges and prepare internal state.
        Can return error code that should stop Runner.

        Flow:
        - Conclude requirements based on configuration
        - Conclude required features based on auto discovery
        - confront user expectations from configuration file with resctrl fs and security access
        - check RDT HW monitoring features availability
        """
        resctrl_available = resctrl.check_resctrl()
        # If enabled explicitly check resctrl availability right now.
        if self._rdt_enabled is True and not resctrl_available:
            log.error('RDT explicitly enabled but resctrl fs not available - exiting!')
            return 1

        # Auto discovery Intel RDT features.
        if self._rdt_enabled is None:
            # Assume yes temporary - but will check monitoring/access later.
            log.debug('Enable RDT auto discovery (resctrl availability=%s)', resctrl_available)
            self._rdt_enabled = resctrl_available
            rdt_auto_enabling = True
        else:
            rdt_auto_enabling = False

        log.debug('gather_hw_mm_topology: %s', self._gather_hw_mm_topology)
        platform, _, _ = platforms.collect_platform_information(
            resctrl_available,
            gather_hw_mm_topology=self._gather_hw_mm_topology
        )

        # Confront RDT (resctrl fs) with HW enabled monitoring features.
        if self._rdt_enabled and not platform.rdt_information.is_monitoring_enabled():
            # Note: WCA does not support RDT without monitoring (keeps a mapping of
            # cgroups and resctrl groups).
            msg = ('Resctrl is available but RDT monitoring features are not!' +
                   'Please enable CMT or MBM with kernel parameters (monitoring is ' +
                   'required for CAT or MBA allocation)!')
            if rdt_auto_enabling:
                log.debug(msg)
                self._rdt_enabled = False
                platform.rdt_information = None
                # override rdt information should not be available later
                # e.g. ContainerManager
            else:
                # If RDT was force fail short here.
                log.error(msg)
                return 1

        # All RDT checks (security/check) done - show info and call initialization callback.
        log.info('RDT: %s %s', 'enabled' if self._rdt_enabled else 'disabled',
                 ' (auto discovery)' if rdt_auto_enabling else '',
                 )

        # Event names (perf cgroups)
        self._event_names = filter_out_event_names_for_cpu(
            self._event_names, platform.cpu_codename)

        log.info('Enabling %i perf events (for cgroups).', len(self._event_names))
        log.debug('Enabling perf events: %s', ', '.join(self._event_names))
        # Check and assume most popular number of available number of HW counters.
        if self._event_names:
            if not check_perf_event_count_limit(self._event_names, platform.cpus, platform.cores):
                return 1

        # _allocation_configuration is set in allocation mode (AllocationRunner)
        # so we need access to write in cgroups.
        write_to_cgroup = self._allocation_configuration is not None
        use_perf = len(self._event_names) > 0
        # Check we have enough access.
        if not security.are_privileges_sufficient(write_to_cgroup, self._rdt_enabled, use_perf):
            return 1

        # Resctrl is enabled and available, call a placeholder to allow further initialization.
        # For "measurement mode" it's nothing to configure in RDT.
        # Check if it's needed to specific rdt initialization in case
        # of using "MeasurementRunner" as component functionality in other runners e.g. Allocation.
        if self._rdt_enabled:
            if self._initialize_rdt_callback is not None:
                rdt_initialization_ok = self._initialize_rdt_callback()

                if not rdt_initialization_ok:
                    return 1

        self._containers_manager = ContainerManager(
            platform=platform,
            allocation_configuration=self._allocation_configuration,
            event_names=self._event_names,
            enable_derived_metrics=self._enable_derived_metrics,
            wss_reset_cycles=self._wss_reset_cycles,
            wss_stable_cycles=self._wss_stable_cycles,
            wss_membw_threshold=self._wss_membw_threshold,
            perf_aggregate_cpus=self._perf_aggregate_cpus,
            interval=self._interval,
            sched=self._sched,
        )
        log.log(TRACE, 'container manager config: %s', self._containers_manager.__dict__)

        self._init_uncore_pmu_events(self._enable_derived_metrics, self._uncore_events, platform)

        return None
Пример #8
0
class MeasurementRunner(Runner):
    """MeasurementRunner run iterations to collect platform, resource, task measurements
    and store them in metrics_storage component.

    Arguments:
        node: component used for tasks discovery
        metrics_storage: storage to store platform, internal, resource and task metrics
            (defaults to DEFAULT_STORAGE/LogStorage to output for standard error)
        action_delay: iteration duration in seconds (None disables wait and iterations)
            (defaults to 1 second)
        rdt_enabled: enables or disabled support for RDT monitoring
            (defaults to None(auto) based on platform capabilities)
        extra_labels: additional labels attached to every metrics
            (defaults to empty dict)
        event_names: perf counters to monitor
            (defaults to instructions, cycles, cache-misses, memstalls)
        enable_derived_metrics: enable derived metrics ips, ipc and cache_hit_ratio
            (based on enabled_event names), default to False
        task_label_generators: component to generate additional labels for tasks
    """
    def __init__(
        self,
        node: nodes.Node,
        metrics_storage: storage.Storage = DEFAULT_STORAGE,
        action_delay: Numeric(0, 60) = 1.,  # [s]
        rdt_enabled: Optional[
            bool] = None,  # Defaults(None) - auto configuration.
        extra_labels: Dict[Str, Str] = None,
        event_names: List[str] = DEFAULT_EVENTS,
        enable_derived_metrics: bool = False,
        task_label_generators: Dict[str, TaskLabelGenerator] = None,
        _allocation_configuration: Optional[AllocationConfiguration] = None,
    ):

        self._node = node
        self._metrics_storage = metrics_storage
        self._action_delay = action_delay
        self._rdt_enabled = rdt_enabled
        # Disabled by default, to be overridden by subclasses.
        self._rdt_mb_control_required = False
        # Disabled by default, to overridden by subclasses.
        self._rdt_cache_control_required = False
        self._extra_labels = extra_labels or dict()
        self._finish = False  # Guard to stop iterations.
        self._last_iteration = time.time()  # Used internally by wait function.
        self._allocation_configuration = _allocation_configuration
        self._event_names = event_names

        self._enable_derived_metrics = enable_derived_metrics

        # Default value for task_labels_generator.
        if task_label_generators is None:
            self._task_label_generators = {
                'application':
                TaskLabelRegexGenerator('$', '', 'task_name'),
                'application_version_name':
                TaskLabelRegexGenerator('.*$', '', 'task_name'),
            }
        else:
            self._task_label_generators = task_label_generators
        # Generate label value with cpu initial assignment, to simplify
        #   management of distributed model system for plugin:
        #   https://github.com/intel/platform-resource-manager/tree/master/prm"""
        #
        # To not risk subtle bugs in 1.0.x do not add it to _task_label_generators as default,
        #   but make it hardcoded here and possible do be removed.
        self._task_label_generators['initial_task_cpu_assignment'] = \
            TaskLabelResourceGenerator('cpus')

    @profiler.profile_duration(name='sleep')
    def _wait(self):
        """Decides how long one iteration should take.
        Additionally calculate residual time, based on time already taken by iteration.
        """
        now = time.time()
        iteration_duration = now - self._last_iteration

        residual_time = max(0., self._action_delay - iteration_duration)
        time.sleep(residual_time)
        self._last_iteration = time.time()

    def _initialize(self) -> Optional[int]:
        """Check privileges, RDT availability and prepare internal state.
        Can return error code that should stop Runner.
        """
        if not security.are_privileges_sufficient():
            log.error(
                "Insufficient privileges! "
                "Impossible to use perf_event_open/resctrl subsystems. "
                "For unprivileged user it is needed to: "
                "adjust /proc/sys/kernel/perf_event_paranoid (set to -1), "
                "has CAP_DAC_OVERRIDE and CAP_SETUID capabilities and"
                "SECBIT_NO_SETUID_FIXUP secure bit set.")
            return 1

        # Initialization (auto discovery Intel RDT features).

        rdt_available = resctrl.check_resctrl()
        if self._rdt_enabled is None:
            self._rdt_enabled = rdt_available
            log.info('RDT enabled (auto configuration): %s', self._rdt_enabled)
        elif self._rdt_enabled is True and not rdt_available:
            log.error('RDT explicitly enabled but not available - exiting!')
            return 1

        if self._rdt_enabled:
            # Resctrl is enabled and available, call a placeholder to allow further initialization.
            rdt_initialization_ok = self._initialize_rdt()
            if not rdt_initialization_ok:
                return 1

        # Postpone the container manager initialization after rdt checks were performed.
        platform_cpus, _, platform_sockets = platforms.collect_topology_information(
        )

        platform, _, _ = platforms.collect_platform_information(
            self._rdt_enabled)
        rdt_information = platform.rdt_information

        self._event_names = _filter_out_event_names_for_cpu(
            self._event_names, platform.cpu_codename)

        # We currently do not support RDT without monitoring.
        if self._rdt_enabled and not rdt_information.is_monitoring_enabled():
            log.error('RDT monitoring is required - please enable CAT '
                      'or MBM with kernel parameters!')
            return 1

        self._containers_manager = ContainerManager(
            platform=platform,
            allocation_configuration=self._allocation_configuration,
            event_names=self._event_names,
            enable_derived_metrics=self._enable_derived_metrics,
        )
        return None

    def _iterate(self):
        iteration_start = time.time()

        # Get information about tasks.
        try:
            tasks = self._node.get_tasks()
        except TaskSynchronizationException as e:
            log.error(
                'Cannot synchronize tasks with node (error=%s) - skip this iteration!',
                e)
            self._wait()
            return

        append_additional_labels_to_tasks(self._task_label_generators, tasks)
        log.debug('Tasks detected: %d', len(tasks))

        # Keep sync of found tasks and internally managed containers.
        containers = self._containers_manager.sync_containers_state(tasks)

        # Platform information
        platform, platform_metrics, platform_labels = platforms.collect_platform_information(
            self._rdt_enabled)

        # Common labels
        common_labels = dict(platform_labels, **self._extra_labels)

        # Tasks data
        tasks_measurements, tasks_resources, tasks_labels = _prepare_tasks_data(
            containers)
        tasks_metrics = _build_tasks_metrics(tasks_labels, tasks_measurements)

        self._iterate_body(containers, platform, tasks_measurements,
                           tasks_resources, tasks_labels, common_labels)

        self._wait()

        iteration_duration = time.time() - iteration_start
        profiling.profiler.register_duration('iteration', iteration_duration)

        # Generic metrics.
        metrics_package = MetricPackage(self._metrics_storage)
        metrics_package.add_metrics(_get_internal_metrics(tasks))
        metrics_package.add_metrics(platform_metrics)
        metrics_package.add_metrics(tasks_metrics)
        metrics_package.add_metrics(profiling.profiler.get_metrics())
        metrics_package.add_metrics(get_logging_metrics())
        metrics_package.send(common_labels)

    def run(self) -> int:
        """Loop that gathers platform and tasks metrics and calls _iterate_body.
        _iterate_body is a method to be subclassed.
        """
        error_code = self._initialize()
        if error_code is not None:
            return error_code

        while True:
            self._iterate()

            if self._finish:
                break

        # Cleanup phase.
        self._containers_manager.cleanup()
        return 0

    def _iterate_body(self, containers, platform, tasks_measurements,
                      tasks_resources, tasks_labels, common_labels):
        """No-op implementation of inner loop body - called by iterate"""

    def _initialize_rdt(self) -> bool:
        """Nothing to configure in RDT to measure resource usage.
        Returns state of rdt initialization (True ok, False for error)
        """
        return True
class MeasurementRunner(Runner):
    """rst

    MeasurementRunner run iterations to collect platform, resource, task measurements
    and store them in metrics_storage component.

    - `node`: **type**:

        Component used for tasks discovery.

    - ``metrics_storage``: **type** = `DEFAULT_STORAGE`

        Storage to store platform, internal, resource and task metrics.
        (defaults to DEFAULT_STORAGE/LogStorage to output for standard error)

    - ``interval``: **Numeric(0,60)** = *1.*

        Iteration duration in seconds (None disables wait and iterations).
        (defaults to 1 second)

    - ``rdt_enabled``: **Optional[bool]** = *None*

        Enables or disabled support for RDT monitoring.
        (defaults to None(auto) based on platform capabilities)

    - ``gather_hw_mm_topology``: **Optional[bool]** = *None*

        Gather hardware/memory topology based on dmidecode and ipmctl.
        (defaults to None(auto) based on dmidecode and ipmctl binaries availability)

    - ``extra_labels``: **Optional[Dict[Str, Str]]** = *None*

        Additional labels attached to every metrics.
        (defaults to empty dict)

    - ``event_names``: **List[str]** = `[]`

        Perf counters to monitor.
        (defaults to not collect perf counters - empty list of events)

    - ``perf_aggregate_cpus``: **bool** = `True`

        Should perf events collected for cgroups be aggregated (sum) by CPUs.
        (defaults to true, to limit number of exposed metrics)

    - ``enable_derived_metrics``: **bool** = *False*

        Enable derived metrics ips, ipc and cache_hit_ratio.
        (based on enabled_event names, default to False)

    - ``uncore_event_names``: **List[str]** = `[]`

        Enable perf event uncore metrics.
        (defaults to None - automatic, if available enable)

    - ``task_label_generators``: **Optional[Dict[str, TaskLabelGenerator]]** = *None*

        Component to generate additional labels for tasks.
        (optional)

    - ``allocation_configuration``: **Optional[AllocationConfiguration]** = *None*

        Allows fine grained control over allocations.
        (defaults to AllocationConfiguration() instance)

    - ``wss_reset_interval``: **int** = *0*

        Interval of resetting WSS (WorkingSetSize).
        (defaults to 0, which means that metric is not collected, e.g. when set to 1
        ``clear_refs`` will be reset every measurement iteration defined by ``interval`` option.)

    - ``include_optional_labels``: **bool** = *False*

        Attach following labels to all metrics:
        `sockets`, `cores`, `cpus`, `cpu_model`, `cpu_model_number` and `wca_version`
        (defaults to False)
    """
    def __init__(
            self,
            node: Node,
            metrics_storage: Storage = DEFAULT_STORAGE,
            interval: Numeric(0, 60) = 1.,
            rdt_enabled: Optional[bool] = None,
            gather_hw_mm_topology: Optional[bool] = None,
            extra_labels: Optional[Dict[Str, Str]] = None,
            event_names: List[str] = [],
            perf_aggregate_cpus: bool = True,
            enable_derived_metrics: bool = False,
            uncore_event_names: List[str] = [],
            task_label_generators: Optional[Dict[str,
                                                 TaskLabelGenerator]] = None,
            allocation_configuration: Optional[AllocationConfiguration] = None,
            wss_reset_interval: int = 0,
            include_optional_labels: bool = False):

        self._node = node
        self._metrics_storage = metrics_storage
        self._interval = interval
        self._rdt_enabled = rdt_enabled
        self._gather_hw_mm_topology = gather_hw_mm_topology
        self._include_optional_labels = include_optional_labels

        self._extra_labels = {k: str(v)
                              for k, v in extra_labels.items()
                              } if extra_labels else dict()
        log.debug('Extra labels: %r', self._extra_labels)
        self._finish = False  # Guard to stop iterations.
        self._last_iteration = time.time()  # Used internally by wait function.
        self._allocation_configuration = allocation_configuration
        self._event_names = event_names
        self._perf_aggregate_cpus = perf_aggregate_cpus

        # TODO: fix those workarounds for dynamic levels and dynamic perf event metrics.
        # First add dynamic metrics
        for event_name in event_names:
            # is dynamic raw event
            if '__r' in event_name:
                log.debug('Creating metadata for dynamic metric: %r',
                          event_name)
                METRICS_METADATA[event_name] = MetricMetadata(
                    'Hardware PMU counter (raw event)',
                    MetricType.COUNTER,
                    MetricUnit.NUMERIC,
                    MetricSource.PERF_SUBSYSTEM_WITH_CGROUPS,
                    MetricGranularity.TASK,
                    [],
                    'no (event_names)',
                )
        # We had the modify levels for all metrics
        # The set proper levels based on perf_aggregate_cpus value
        if not perf_aggregate_cpus:
            log.debug(
                'Enabling "cpu" level for PERF_SUBSYSTEM_WITH_CGROUPS and derived metrics.'
            )
            for metric_metadata in METRICS_METADATA.values():
                if metric_metadata.source == MetricSource.PERF_SUBSYSTEM_WITH_CGROUPS:
                    metric_metadata.levels = ['cpu']
                if metric_metadata.source == MetricSource.DERIVED_PERF_WITH_CGROUPS:
                    metric_metadata.levels = ['cpu']

        self._enable_derived_metrics = enable_derived_metrics
        self._uncore_event_names = uncore_event_names

        self._task_label_generators = task_label_generators or {}

        self._wss_reset_interval = wss_reset_interval

        self._uncore_pmu = None

        self._initialize_rdt_callback = None
        self._iterate_body_callback = None

    def _set_initialize_rdt_callback(self, func):
        self._initialize_rdt_callback = func

    def _set_iterate_body_callback(self, func):
        self._iterate_body_callback = func

    @profiler.profile_duration(name='sleep')
    def _wait(self):
        """Decides how long one iteration should take.
        Additionally calculate residual time, based on time already taken by iteration.
        """
        now = time.time()
        iteration_duration = now - self._last_iteration

        residual_time = max(0., self._interval - iteration_duration)
        time.sleep(residual_time)
        self._last_iteration = time.time()

    def _initialize(self) -> Optional[int]:
        """Check RDT availability, privileges and prepare internal state.
        Can return error code that should stop Runner.

        Flow:
        - Conclude requirments based on configuration
        - Conclude required features based on auto discovery
        - confront user expectactions from configuration file with resctrl fs and security access
        - check RDT HW monitoring features availability
        """
        resctrl_available = resctrl.check_resctrl()
        # If enabled explicitly check resctrl availability right now.
        if self._rdt_enabled is True and not resctrl_available:
            log.error(
                'RDT explicitly enabled but resctrl fs not available - exiting!'
            )
            return 1

        # Auto discovery Intel RDT features.
        if self._rdt_enabled is None:
            # Assume yes temporary - but will check monitoring/access later.
            log.debug('Enable RDT auto discovery (resctrl availability=%s)',
                      resctrl_available)
            self._rdt_enabled = resctrl_available
            rdt_auto_enabling = True
        else:
            rdt_auto_enabling = False

        log.debug('gather_hw_mm_topology: %s', self._gather_hw_mm_topology)
        platform, _, _ = platforms.collect_platform_information(
            resctrl_available,
            gather_hw_mm_topology=self._gather_hw_mm_topology)

        # Confront RDT (resctrl fs) with HW enabled monitoring features.
        if self._rdt_enabled and not platform.rdt_information.is_monitoring_enabled(
        ):
            # Note: WCA does not support RDT without monitoring (keeps a mapping of
            # cgroups and resctrl groups).
            msg = (
                'Resctrl is available but RDT monitoring features are not!' +
                'Please enable CMT or MBM with kernel parameters (monitoring is '
                + 'required for CAT or MBA allocation)!')
            if rdt_auto_enabling:
                log.debug(msg)
                self._rdt_enabled = False
                platform.rdt_information = None
                # override rdt information should not be available later
                # e.g. ContainerManager
            else:
                # If RDT was force fail short here.
                log.error(msg)
                return 1

        # All RDT checks (security/check) done - show info and call initialization callback.
        log.info(
            'RDT: %s %s',
            'enabled' if self._rdt_enabled else 'disabled',
            ' (auto discovery)' if rdt_auto_enabling else '',
        )

        # Event names (perf cgroups)
        self._event_names = filter_out_event_names_for_cpu(
            self._event_names, platform.cpu_codename)

        log.info('Enabling %i perf events (for cgroups).',
                 len(self._event_names))
        log.debug('Enabling perf events: %s', ', '.join(self._event_names))
        # Check and assume most popular number of available number of HW counters.
        if self._event_names:
            if not check_perf_event_count_limit(self._event_names,
                                                platform.cpus, platform.cores):
                return 1

        # _allocation_configuration is set in allocation mode (AllocationRunner)
        # so we need access to write in cgroups.
        write_to_cgroup = self._allocation_configuration is not None
        use_perf = len(self._event_names) > 0
        # Check we have enough access.
        if not security.are_privileges_sufficient(write_to_cgroup,
                                                  self._rdt_enabled, use_perf):
            return 1

        # Resctrl is enabled and available, call a placeholder to allow further initialization.
        # For "measurement mode" it's nothing to configure in RDT.
        # Check if it's needed to specific rdt initialization in case
        # of using "MeasurementRunner" as component functionality in other runners e.g. Allocation.
        if self._rdt_enabled:
            if self._initialize_rdt_callback is not None:
                rdt_initialization_ok = self._initialize_rdt_callback()

                if not rdt_initialization_ok:
                    return 1

        self._containers_manager = ContainerManager(
            platform=platform,
            allocation_configuration=self._allocation_configuration,
            event_names=self._event_names,
            enable_derived_metrics=self._enable_derived_metrics,
            wss_reset_interval=self._wss_reset_interval,
            perf_aggregate_cpus=self._perf_aggregate_cpus)

        self._init_uncore_pmu(self._enable_derived_metrics,
                              self._uncore_event_names, platform)

        return None

    def _init_uncore_pmu(self, enable_derived_metrics, uncore_event_names,
                         platform: platforms.Platform):
        _enable_perf_uncore = len(uncore_event_names) > 0
        self._uncore_pmu = None
        self._uncore_get_measurements = lambda: {}
        if _enable_perf_uncore:
            pmu_events = {}
            imc_events = []
            upi_events = []
            for event in uncore_event_names:
                if event in UNCORE_IMC_EVENTS:
                    imc_events.append(UNCORE_IMC_EVENTS[event])
                elif event in UNCORE_UPI_EVENTS:
                    upi_events.append(UNCORE_UPI_EVENTS[event])
                else:
                    raise Exception('Unknown event name: {}'.format(event))
            try:
                # Cpus and events for perf uncore imc
                cpus_imc, pmu_events_imc = _discover_pmu_uncore_config(
                    imc_events, 'uncore_imc_')
                pmu_events.update(pmu_events_imc)
                # Cpus and events for perf uncore upi
                cpus_upi, pmu_events_upi = _discover_pmu_uncore_config(
                    upi_events, 'uncore_upi_')
                pmu_events.update(pmu_events_upi)
                cpus = list(set(cpus_imc + cpus_upi))
            except PMUNotAvailable:
                log.error('PMU metrics requested but PMU not available!')
                raise

            # Prepare uncore object
            self._uncore_pmu = UncorePerfCounters(
                cpus=cpus,
                pmu_events=pmu_events,
                platform=platform,
            )

            # Wrap with derived..
            if enable_derived_metrics:
                self._uncore_derived_metrics = UncoreDerivedMetricsGenerator(
                    self._uncore_pmu.get_measurements)
                self._uncore_get_measurements = self._uncore_derived_metrics.get_measurements
            else:
                self._uncore_get_measurements = self._uncore_pmu.get_measurements

    def _iterate(self):
        iteration_start = time.time()

        # Get information about tasks.
        try:
            tasks = self._node.get_tasks()
        except TaskSynchronizationException as e:
            log.error(
                'Cannot synchronize tasks with node (error=%s) - skip this iteration!',
                e)
            self._wait()
            return

        append_additional_labels_to_tasks(self._task_label_generators, tasks)
        log.debug('Tasks detected: %d', len(tasks))

        # Keep sync of found tasks and internally managed containers.
        containers = self._containers_manager.sync_containers_state(tasks)
        log.log(
            TRACE, 'Tasks container mapping:\n%s', '\n'.join([
                '%s(%s)  =  %s' %
                (task.name, task.task_id, container._cgroup_path)
                for task, container in containers.items()
            ]))

        # @TODO why not in platform module?
        extra_platform_measurements = self._uncore_get_measurements()
        extra_platform_measurements.update(get_bandwidth())

        # Platform information
        platform, platform_metrics, platform_labels = platforms.collect_platform_information(
            self._rdt_enabled,
            self._gather_hw_mm_topology,
            extra_platform_measurements=extra_platform_measurements,
            include_optional_labels=False,
        )

        # Common labels
        common_labels = dict(platform_labels, **self._extra_labels)

        try:
            tasks_data = _prepare_tasks_data(containers)
        except MissingMeasurementException as e:
            log.error(
                'Cannot synchronize tasks measurements (error=%s) - skip this iteration!',
                e)
            self._wait()
            return

        # Inject other runners code.
        if self._iterate_body_callback is not None:
            self._iterate_body_callback(containers, platform, tasks_data,
                                        common_labels)

        self._wait()

        iteration_duration = time.time() - iteration_start
        profiling.profiler.register_duration('iteration', iteration_duration)

        # Generic metrics.
        metrics_package = MetricPackage(self._metrics_storage)
        metrics_package.add_metrics(_get_internal_metrics(tasks))
        metrics_package.add_metrics(platform_metrics)
        metrics_package.add_metrics(_build_tasks_metrics(tasks_data))
        metrics_package.add_metrics(profiling.profiler.get_metrics())
        metrics_package.add_metrics(get_logging_metrics())
        metrics_package.send(common_labels)

    def run(self) -> int:
        """Loop that gathers platform and tasks metrics and calls _iterate_body.
        _iterate_body is a method to be subclassed.
        """
        error_code = self._initialize()
        if error_code is not None:
            return error_code

        while True:
            self._iterate()

            if self._finish:
                break

        # Cleanup phase.
        self._containers_manager.cleanup()
        return 0