def __init__(self, measurement_runner: MeasurementRunner, allocator: Allocator, allocations_storage: Storage = DEFAULT_STORAGE, anomalies_storage: Storage = DEFAULT_STORAGE, rdt_mb_control_required: bool = False, rdt_cache_control_required: bool = False, remove_all_resctrl_groups: bool = False): if not measurement_runner._allocation_configuration: measurement_runner._allocation_configuration = AllocationConfiguration( ) self._measurement_runner = measurement_runner # Allocation specific. self._allocator = allocator self._allocations_storage = allocations_storage self._rdt_mb_control_required = rdt_mb_control_required self._rdt_cache_control_required = rdt_cache_control_required # Anomaly. self._anomalies_storage = anomalies_storage self._anomalies_statistics = AnomalyStatistics() # Internal allocation statistics self._allocations_counter = 0 self._allocations_errors = 0 self._remove_all_resctrl_groups = remove_all_resctrl_groups self._measurement_runner._set_iterate_body_callback(self._iterate_body) self._measurement_runner._set_initialize_rdt_callback( self._initialize_rdt)
def test_measurements_wait(sleep_mock): with patch('time.time', return_value=1): runner = MeasurementRunner(node=Mock(spec=MesosNode, get_tasks=Mock(return_value=[])), metrics_storage=Mock(spec=storage.Storage, store=Mock()), rdt_enabled=False, extra_labels={}) runner._initialize() runner._iterate() sleep_mock.assert_called_once_with(1.0) with patch('time.time', return_value=1.3): runner._iterate() sleep_mock.assert_called_with(0.7) assert runner._last_iteration == 1.3 with patch('time.time', return_value=2.5): runner._iterate() sleep_mock.assert_called_with(0)
def test_measurements_runner_init_and_checks(rdt_enabled, resctrl_available, monitoring_available, access_ok, ok): # auto rdt runner = MeasurementRunner( node=Mock(spec=MesosNode), metrics_storage=Mock(spec=storage.Storage), rdt_enabled=rdt_enabled, ) platform_mock = Mock(rdt_information=Mock(is_monitoring_enabled=Mock( return_value=monitoring_available))) with patch('wca.resctrl.check_resctrl', return_value=resctrl_available), \ patch('wca.security.are_privileges_sufficient', return_value=access_ok), \ patch('wca.platforms.collect_platform_information', return_value=(platform_mock, None, None)): if ok: # ok no error assert runner._initialize() is None else: # fails assert runner._initialize() == 1
def test_rdt_initialize( rdt_max_values_mock, cleanup_resctrl_mock, default_rdt_l3, default_rdt_mb, config_rdt_mb_control_enabled, config_rdt_cache_control_enabled, platform_rdt_mb_control_enabled, platform_rdt_cache_control_enabled, expected_error, expected_final_rdt_mb_control_enabled_with_value, expected_cleanup_arguments, ): allocation_configuration = AllocationConfiguration( default_rdt_mb=default_rdt_mb, default_rdt_l3=default_rdt_l3) runner = AllocationRunner( measurement_runner=MeasurementRunner( node=Mock(spec=Node), interval=1, rdt_enabled=True, metrics_storage=Mock(spec=Storage), allocation_configuration=allocation_configuration, ), allocator=Mock(spec=Allocator), anomalies_storage=Mock(spec=Storage), allocations_storage=Mock(spec=Storage), rdt_mb_control_required=config_rdt_mb_control_enabled, rdt_cache_control_required=config_rdt_cache_control_enabled) with patch( 'tests.testing.platform_mock.rdt_information', Mock( spec=RDTInformation, cbm_mask='fff', min_cbm_bits='2', mb_min_bandwidth=10, mb_bandwidth_gran=10, rdt_mb_control_enabled=platform_rdt_mb_control_enabled, rdt_cache_control_enabled=platform_rdt_cache_control_enabled)): assert runner._initialize_rdt() is not expected_error if expected_final_rdt_mb_control_enabled_with_value: assert runner._rdt_mb_control_required == expected_final_rdt_mb_control_enabled_with_value if expected_cleanup_arguments: cleanup_resctrl_mock.assert_called_with(*expected_cleanup_arguments) else: assert cleanup_resctrl_mock.call_count == 0
def test_measurements_runner(subcgroups): # Node mock t1 = redis_task_with_default_labels('t1', subcgroups) t2 = redis_task_with_default_labels('t2', subcgroups) runner = MeasurementRunner( node=Mock(spec=MesosNode, get_tasks=Mock(return_value=[t1, t2])), metrics_storage=Mock(spec=storage.Storage, store=Mock()), rdt_enabled=False, extra_labels=dict( extra_label='extra_value') # extra label with some extra value ) runner._wait = Mock() # Mock to finish after one iteration. runner._initialize() runner._iterate() # Check output metrics. got_metrics = runner._metrics_storage.store.call_args[0][0] # Internal wca metrics are generated (wca is running, number of task under control, # memory usage and profiling information) assert_metric(got_metrics, 'wca_up', dict(extra_label='extra_value')) assert_metric(got_metrics, 'wca_tasks', expected_metric_value=2) # wca & its children memory usage (in bytes) assert_metric(got_metrics, 'wca_memory_usage_bytes', expected_metric_value=WCA_MEMORY_USAGE * 2 * 1024) # Measurements metrics about tasks, based on get_measurements mocks. cpu_usage = TASK_CPU_USAGE * (len(subcgroups) if subcgroups else 1) assert_metric(got_metrics, 'cpu_usage', dict(task_id=t1.task_id), expected_metric_value=cpu_usage) assert_metric(got_metrics, 'cpu_usage', dict(task_id=t2.task_id), expected_metric_value=cpu_usage) # Test whether application and application_version_name were properly generated using # default runner._task_label_generators defined in constructor of MeasurementsRunner. assert_metric(got_metrics, 'cpu_usage', { 'application': t1.name, 'application_version_name': '' }) # Test whether `initial_task_cpu_assignment` label is attached to task metrics. assert_metric(got_metrics, 'cpu_usage', {'initial_task_cpu_assignment': '8.0'})
def test_measurements_runner(subcgroups): # Node mock t1 = redis_task_with_default_labels('t1', subcgroups) t2 = redis_task_with_default_labels('t2', subcgroups) runner = MeasurementRunner(node=Mock( spec=MesosNode, get_tasks=Mock(return_value=[t1, t2])), metrics_storage=Mock(spec=storage.Storage, store=Mock()), rdt_enabled=False, gather_hw_mm_topology=False, extra_labels=dict(extra_label='extra_value')) runner._wait = Mock() # Mock to finish after one iteration. runner._initialize() runner._iterate() # Check output metrics. got_metrics = runner._metrics_storage.store.call_args[0][0] # Internal wca metrics are generated (wca is running, number of task under control, # memory usage and profiling information) assert_metric(got_metrics, MetricName.WCA_UP, dict(extra_label='extra_value')) assert_metric(got_metrics, MetricName.WCA_TASKS, expected_metric_value=2) # wca & its children memory usage (in bytes) assert_metric(got_metrics, MetricName.WCA_MEM_USAGE_BYTES, expected_metric_value=WCA_MEMORY_USAGE * 2 * 1024) # Measurements metrics about tasks, based on get_measurements mocks. cpu_usage = TASK_CPU_USAGE * (len(subcgroups) if subcgroups else 1) assert_metric(got_metrics, MetricName.TASK_CPU_USAGE_SECONDS, dict(task_id=t1.task_id), expected_metric_value=cpu_usage) assert_metric(got_metrics, MetricName.TASK_CPU_USAGE_SECONDS, dict(task_id=t2.task_id), expected_metric_value=cpu_usage)
def test_detection_runner(reset_counters_mock, subcgroups): # Tasks mock t1 = redis_task_with_default_labels('t1', subcgroups) t2 = redis_task_with_default_labels('t2', subcgroups) # Detector mock - simulate returning one anomaly and additional metric detector_mock = Mock( spec=AnomalyDetector, detect=Mock(return_value=( [ anomaly(t1.task_id, [t2.task_id], metrics=[metric('contention_related_metric')]) ], # one anomaly + related metric [metric('extra_metric_from_detector')] # one extra metric ))) runner = DetectionRunner(measurement_runner=MeasurementRunner( node=Mock(spec=MesosNode, get_tasks=Mock(return_value=[t1, t2])), metrics_storage=Mock(spec=storage.Storage, store=Mock()), rdt_enabled=False, extra_labels=dict(extra_label='extra_value'), ), anomalies_storage=Mock(spec=storage.Storage, store=Mock()), detector=detector_mock) runner._measurement_runner._wait = Mock() runner._measurement_runner._initialize() # Mock to finish after one iteration. runner._measurement_runner._iterate() got_anomalies_metrics = runner._anomalies_storage.store.mock_calls[0][1][0] # Check that anomaly based metrics, assert_metric(got_anomalies_metrics, 'anomaly', expected_metric_some_labels={ LABEL_WORKLOAD_INSTANCE: t1.labels[LABEL_WORKLOAD_INSTANCE], LABEL_CONTENDED_TASK_ID: t1.task_id, LABEL_CONTENDING_WORKLOAD_INSTANCE: t2.labels[LABEL_WORKLOAD_INSTANCE] }) assert_metric(got_anomalies_metrics, 'contention_related_metric', expected_metric_some_labels=dict(extra_label='extra_value')) assert_metric(got_anomalies_metrics, 'extra_metric_from_detector') assert_metric(got_anomalies_metrics, 'anomaly_count', expected_metric_value=1) assert_metric(got_anomalies_metrics, 'anomaly_last_occurrence') # Check that detector was called with proper arguments. (platform, tasks_data) = detector_mock.detect.mock_calls[0][1] # Make sure that proper values are propagate to detect method for t1. assert platform == platform_mock # Measurements have to mach get_measurements mock from measurements_patch decorator. # Labels should have extra LABEL_WORKLOAD_INSTANCE based on redis_task_with_default_labels # and sanitized version of other labels for mesos (without prefix). # Resources should match resources from redis_task_with_default_labels # Check any metrics for t2 cpu_usage = TASK_CPU_USAGE * (len(subcgroups) if subcgroups else 1) assert_subdict(tasks_data[t1.task_id].measurements, {MetricName.TASK_CPU_USAGE_SECONDS: cpu_usage}) assert_subdict(tasks_data[t1.task_id].labels, { LABEL_WORKLOAD_INSTANCE: 'redis_6792_t1', 'load_generator': 'rpc-perf-t1' }) assert_subdict(tasks_data[t1.task_id].resources, t1.resources) assert_subdict(tasks_data[t1.task_id].measurements, {MetricName.TASK_CPU_USAGE_SECONDS: cpu_usage})
def test_allocation_runner(_get_allocations_mock, _get_allocations_mock_, platform_mock, reset_counter_mock, subcgroups): """ Low level system calls are not mocked - but higher level objects and functions: Cgroup, Resgroup, Platform, etc. Thus the test do not cover the full usage scenario (such tests would be much harder to write). """ # Tasks mock t1 = redis_task_with_default_labels('t1', subcgroups) t2 = redis_task_with_default_labels('t2', subcgroups) # Allocator mock (lower the quota and number of cache ways in dedicated group). # Patch some of the functions of AllocationRunner. runner = AllocationRunner(measurement_runner=MeasurementRunner( node=Mock(spec=MesosNode, get_tasks=Mock(return_value=[])), metrics_storage=Mock(spec=storage.Storage, store=Mock()), rdt_enabled=True, gather_hw_mm_topology=False, extra_labels=dict(extra_labels='extra_value'), ), anomalies_storage=Mock(spec=storage.Storage, store=Mock()), allocations_storage=Mock(spec=storage.Storage, store=Mock()), rdt_mb_control_required=True, rdt_cache_control_required=True, allocator=Mock(spec=Allocator, allocate=Mock(return_value=({}, [], [])))) runner._measurement_runner._wait = Mock() runner._measurement_runner._initialize() ############ # First run (one task, one allocation). runner._measurement_runner._node.get_tasks.return_value = [t1] runner._allocator.allocate.return_value = ({ t1.task_id: { AllocationType.QUOTA: .5, AllocationType.RDT: RDTAllocation(name=None, l3='L3:0=0000f') } }, [], []) runner._measurement_runner._iterate() # Check that allocator.allocate was called with proper arguments. assert runner._allocator.allocate.call_count == 1 (_, tasks_data) = runner._allocator.allocate.mock_calls[0][1] assert_subdict(tasks_data[t1.task_id].allocations, _os_tasks_allocations) # Check allocation metrics ... got_allocations_metrics = runner._allocations_storage.store.call_args[0][0] # ... generic allocation metrics ... assert_metric(got_allocations_metrics, 'allocations_count', dict(extra_labels='extra_value'), expected_metric_value=1) assert_metric(got_allocations_metrics, 'allocations_errors', dict(extra_labels='extra_value'), expected_metric_value=0) assert_metric(got_allocations_metrics, 'allocation_duration', dict(extra_labels='extra_value')) # ... and allocation metrics for task t1. assert_metric(got_allocations_metrics, 'allocation_cpu_quota', dict(task=t1.task_id, extra_labels='extra_value'), 0.5) assert_metric(got_allocations_metrics, 'allocation_rdt_l3_cache_ways', dict(task=t1.task_id, extra_labels='extra_value'), 4) assert_metric(got_allocations_metrics, 'allocation_rdt_l3_mask', dict(task=t1.task_id, extra_labels='extra_value'), 15) ############################ # Second run (two tasks, one allocation) runner._measurement_runner._node.get_tasks.return_value = [t1, t2] first_run_t1_task_allocations = { t1.task_id: { AllocationType.QUOTA: .5, AllocationType.RDT: RDTAllocation(name=None, l3='L3:0=0000f') } } runner._allocator.allocate.return_value = (first_run_t1_task_allocations, [], []) runner._measurement_runner._iterate() # Check allocation metrics... got_allocations_metrics = runner._allocations_storage.store.call_args[0][0] # ... generic allocation metrics ... assert_metric(got_allocations_metrics, 'allocations_count', expected_metric_value=2) assert_metric(got_allocations_metrics, 'allocations_errors', expected_metric_value=0) assert_metric(got_allocations_metrics, 'allocation_duration') # ... and metrics for task t1 ... assert_metric(got_allocations_metrics, 'allocation_cpu_quota', dict(task=t1.task_id), 0.5) assert_metric(got_allocations_metrics, 'allocation_rdt_l3_cache_ways', dict(task=t1.task_id), 4) assert_metric(got_allocations_metrics, 'allocation_rdt_l3_mask', dict(task=t1.task_id), 15) # Check allocate call. (_, tasks_data) = runner._allocator.allocate.mock_calls[1][1] # (note: tasks_allocations are always read from filesystem) assert_subdict(tasks_data[t1.task_id].allocations, _os_tasks_allocations) assert_subdict(tasks_data[t2.task_id].allocations, _os_tasks_allocations) ############ # Third run (two tasks, two allocations) - modify L3 cache and put in the same group runner._measurement_runner._node.get_tasks.return_value = [t1, t2] runner._allocator.allocate.return_value = \ { t1.task_id: { AllocationType.QUOTA: 0.7, AllocationType.RDT: RDTAllocation(name='one_group', l3='L3:0=00fff') }, t2.task_id: { AllocationType.QUOTA: 0.8, AllocationType.RDT: RDTAllocation(name='one_group', l3='L3:0=00fff') } }, [], [] runner._measurement_runner._iterate() got_allocations_metrics = runner._allocations_storage.store.call_args[0][0] assert_metric(got_allocations_metrics, 'allocations_count', expected_metric_value=4) # ... and metrics for task t1 ... assert_metric(got_allocations_metrics, 'allocation_cpu_quota', dict(task=t1.task_id), 0.7) assert_metric(got_allocations_metrics, 'allocation_cpu_quota', dict(task=t2.task_id), 0.8) assert_metric(got_allocations_metrics, 'allocation_rdt_l3_cache_ways', dict(task=t1.task_id, group_name='one_group'), 12) # 00fff=12 assert_metric(got_allocations_metrics, 'allocation_rdt_l3_cache_ways', dict(task=t1.task_id, group_name='one_group'), 12) # 00fff=12
def test_get_unknown_event(event_name, event_value, umask, config, config1, expected_output): assert MeasurementRunner._get_unknown_event(event_name, event_value, umask, config, config1) == expected_output assert event_name in METRICS_METADATA
def test_get_event_if_known(event, expected_output): assert MeasurementRunner._get_event_if_known(event) == expected_output
def test_parse_uncore_event_input_fail(event, exception): with pytest.raises(exception): MeasurementRunner._parse_uncore_event_input(event)
def test_parse_uncore_event_input(event, expected_output): assert MeasurementRunner._parse_uncore_event_input( event) == expected_output