def test_add_measurement(self): plot = SimplePlot(name='test_plot', title='test_title', x_axis='perf_throughput', y_axis='perf_latency_p99') gpu_data = {0: {'gpu_used_memory': 5000, 'gpu_utilization': 50}} non_gpu_data = {'perf_throughput': 200, 'perf_latency_p99': 8000} objective_spec = {'perf_throughput': 10, 'perf_latency_p99': 5} measurement = construct_measurement( 'test_model', gpu_data, non_gpu_data, ResultComparator(metric_objectives=objective_spec)) # Add above measurement plot.add_measurement('test_model_label1', measurement=measurement) self.assertDictEqual( plot.data(), {'test_model_label1': { 'x_data': [200], 'y_data': [8000] }}) # Add measurment again with different label plot.add_measurement('test_model_label2', measurement=measurement) self.assertDictEqual( plot.data(), { 'test_model_label1': { 'x_data': [200], 'y_data': [8000] }, 'test_model_label2': { 'x_data': [200], 'y_data': [8000] } })
def _test_summary_counts(self, add_table_fn, add_plot_fn, default_within_top, top_n): ''' Helper function to test creating summary reports and confirming that the number of entries added to plots and tables is as expected ''' num_plots_in_summary_report = 2 num_tables_in_summary_report = 1 expected_config_count = top_n + 1 if not default_within_top else top_n expected_plot_count = num_plots_in_summary_report * expected_config_count expected_table_count = num_tables_in_summary_report * expected_config_count self._init_managers("test_model1", num_configs_per_model=top_n) result_comparator = ResultComparator( metric_objectives={"perf_throughput": 10}) avg_gpu_metrics = {0: {"gpu_used_memory": 6000, "gpu_utilization": 60}} for i in range(10): p99 = 20 + i throughput = 100 - 10 * i if default_within_top else 100 + 10 * i avg_non_gpu_metrics = { "perf_throughput": throughput, "perf_latency_p99": p99, "cpu_used_ram": 1000 } name = f"test_model1_config_{i}" if not i: name = f"test_model1_config_default" self._add_result_measurement(name, "test_model1", avg_gpu_metrics, avg_non_gpu_metrics, result_comparator) self.result_manager.compile_and_sort_results() self.report_manager.create_summaries() self.assertEqual(expected_plot_count, add_plot_fn.call_count) self.assertEqual(expected_table_count, add_table_fn.call_count)
def _check_measurement_comparison(self, objective_spec, gpu_metric_values1, non_gpu_metric_values1, gpu_metric_values2, non_gpu_metric_values2, expected_result): """ This function is a helper function that takes all the data needed to construct two measurements, and constructs and runs a result comparator on them, and checks it against an expected result """ result_comparator = ResultComparator(metric_objectives=objective_spec) measurement1 = construct_measurement(gpu_metric_values1, non_gpu_metric_values1, result_comparator) measurement2 = construct_measurement(gpu_metric_values2, non_gpu_metric_values2, result_comparator) self.assertEqual( result_comparator.compare_measurements(measurement1, measurement2), expected_result)
def _check_result_comparison(self, objective_spec, avg_gpu_metrics1, avg_gpu_metrics2, avg_non_gpu_metrics1, avg_non_gpu_metrics2, value_step1=1, value_step2=1, expected_result=0): """ Helper function that takes all the data needed to construct two results, constructs and runs a result comparator and checks that it produces the expected value. """ result_comparator = ResultComparator(metric_objectives=objective_spec) result1 = construct_result(avg_gpu_metrics1, avg_non_gpu_metrics1, result_comparator, value_step1) result2 = construct_result(avg_gpu_metrics2, avg_non_gpu_metrics2, result_comparator, value_step2) self.assertEqual(result_comparator.compare_results(result1, result2), expected_result)
def subtest_build_summary_table(self, mode, cpu_only): self._init_managers(mode=mode) result_comparator = ResultComparator( metric_objectives={"perf_throughput": 10}) avg_gpu_metrics = {0: {"gpu_used_memory": 6000, "gpu_utilization": 60}} for i in range(10, 0, -1): avg_non_gpu_metrics = { "perf_throughput": 100 + 10 * i, "perf_latency_p99": 4000, "cpu_used_ram": 1000 } self._add_result_measurement(f"test_model_config_{i}", "test_model", avg_gpu_metrics, avg_non_gpu_metrics, result_comparator, cpu_only) self.result_manager.compile_and_sort_results() self.report_manager.create_summaries() summary_table, summary_sentence = \ self.report_manager._build_summary_table( report_key="test_model", num_measurements=10, gpu_name="TITAN RTX") expected_summary_sentence = ( "In 10 measurement(s), config test_model_config_10 (1/GPU model instance(s)" " with max batch size of 8 and dynamic batching enabled) on" " platform tensorflow_graphdef delivers maximum" " throughput under the given constraints") if not cpu_only: expected_summary_sentence += " on GPU(s) TITAN RTX" expected_summary_sentence += "." self.assertEqual(expected_summary_sentence, summary_sentence) # Get throughput index and make sure results are sorted throughput_index = summary_table.headers().index( "Throughput (infer/sec)") model_name_index = summary_table.headers().index("Model Config Name") for i in range(9): current_row = summary_table.get_row_by_index(i) next_row = summary_table.get_row_by_index(i + 1) self.assertEqual(current_row[model_name_index], f"test_model_config_{10-i}") self.assertGreaterEqual(current_row[throughput_index], next_row[throughput_index])
def test_build_summary_table(self): mock_model_config = MockModelConfig() mock_model_config.start() objective_spec = {'perf_throughput': 10} self.result_comparator = ResultComparator( metric_objectives=objective_spec) avg_gpu_metrics = {0: {'gpu_used_memory': 6000, 'gpu_utilization': 60}} for i in range(10, 0, -1): avg_non_gpu_metrics = { 'perf_throughput': 100 + 10 * i, 'perf_latency': 4000 } self.model_config['name'] = f'model_{i}' model_config = ModelConfig.create_from_dictionary( self.model_config) self.report_manager.add_result( report_key='test_report', result=construct_result( avg_gpu_metric_values=avg_gpu_metrics, avg_non_gpu_metric_values=avg_non_gpu_metrics, comparator=self.result_comparator, model_config=model_config)) summary_table, summary_sentence = \ self.report_manager._build_summary_table( report_key='test_report', num_measurements=10, gpu_name='TITAN RTX') expected_summary_sentence = ( "In 10 measurement(s), 1/GPU model instance(s)" " with max dynamic batch size of [4 8] on" " platform tensorflow_graphdef delivers maximum" " throughput under the given constraints on GPU(s) TITAN RTX.") self.assertEqual(expected_summary_sentence, summary_sentence) # Get throughput index and make sure results are sorted throughput_index = summary_table.headers().index( 'Throughput (infer/sec)') model_name_index = summary_table.headers().index('Model Config Name') for i in range(9): current_row = summary_table.get_row_by_index(i) next_row = summary_table.get_row_by_index(i + 1) self.assertEqual(current_row[model_name_index], f'model_{10-i}') self.assertGreaterEqual(current_row[throughput_index], next_row[throughput_index])
def _subtest_build_detailed_info(self, cpu_only): self._init_managers(models="test_model_config_10", subcommand="report") result_comparator = ResultComparator( metric_objectives={"perf_throughput": 10}) avg_gpu_metrics = { "gpu_uuid": { "gpu_used_memory": 6000, "gpu_utilization": 60 } } for i in range(10, 0, -1): avg_non_gpu_metrics = { "perf_throughput": 100 + 10 * i, "perf_latency_p99": 4000, "cpu_used_ram": 1000 } self._add_result_measurement(f"test_model_config_{i}", "test_model", avg_gpu_metrics, avg_non_gpu_metrics, result_comparator, cpu_only=cpu_only) self.report_manager._add_detailed_report_data() self.report_manager._build_detailed_table("test_model_config_10") sentence = self.report_manager._build_detailed_info( "test_model_config_10") if cpu_only: expected_sentence = ( f"The model config \"test_model_config_10\" uses 1 GPU instance(s) with " f"a max batch size of 8 and has dynamic batching enabled. 1 measurement(s) " f"were obtained for the model config on CPU. " f"This model uses the platform tensorflow_graphdef.") else: expected_sentence = ( f"The model config \"test_model_config_10\" uses 1 GPU instance(s) with " f"a max batch size of 8 and has dynamic batching enabled. 1 measurement(s) " f"were obtained for the model config on GPU(s) fake_gpu_name with memory limit(s) 1.0 GB. " f"This model uses the platform tensorflow_graphdef.") self.assertEqual(expected_sentence, sentence)
def test_add_results(self): for mode in ['online', 'offline']: self._init_managers("test_model1,test_model2", mode=mode) result_comparator = ResultComparator( metric_objectives={"perf_throughput": 10}) avg_gpu_metrics = { 0: { "gpu_used_memory": 6000, "gpu_utilization": 60 } } for i in range(10): avg_non_gpu_metrics = { "perf_throughput": 100 + 10 * i, "perf_latency_p99": 4000, "cpu_used_ram": 1000 } self._add_result_measurement(f"test_model1_report_{i}", "test_model1", avg_gpu_metrics, avg_non_gpu_metrics, result_comparator) for i in range(5): avg_non_gpu_metrics = { "perf_throughput": 200 + 10 * i, "perf_latency_p99": 4000, "cpu_used_ram": 1000 } self._add_result_measurement(f"test_model2_report_{i}", "test_model2", avg_gpu_metrics, avg_non_gpu_metrics, result_comparator) self.result_manager.compile_and_sort_results() self.report_manager.create_summaries() self.assertEqual(self.report_manager.report_keys(), ["test_model1", "test_model2"]) report1_data = self.report_manager.data("test_model1") report2_data = self.report_manager.data("test_model2") self.assertEqual(len(report1_data), 10) self.assertEqual(len(report2_data), 5)
def test_plot_data(self): plot = SimplePlot(name='test_plot', title='test_title', x_axis='perf_throughput', y_axis='perf_latency_p99') gpu_data = {0: {'gpu_used_memory': 5000, 'gpu_utilization': 50}} non_gpu_data = {'perf_throughput': 200, 'perf_latency_p99': 8000} objective_spec = {'perf_throughput': 10, 'perf_latency_p99': 5} measurement = construct_measurement( 'test_model', gpu_data, non_gpu_data, ResultComparator(metric_objectives=objective_spec)) plot.add_measurement('test_model_label', measurement=measurement) # Call plot and assert args plot.plot_data_and_constraints(constraints={}) self.matplotlib_mock.assert_called_plot_with_args( x_data=[200], y_data=[8000], marker='o', label='test_model_label')
def configure_result_manager(self, config_model): """ Processes the constraints and objectives for given ConfigModel and creates a result comparator to pass to the result manager Parameters ---------- config_model : ConfigModel The config model object for the model that is currently being run """ constraints = {} # Construct dict of record types for objectives and constraints objective_tags = list(config_model.objectives().keys()) objective_metrics = MetricsManager.get_metric_types( tags=objective_tags) objectives = { objective_metrics[i]: config_model.objectives()[objective_tags[i]] for i in range(len(objective_tags)) } # Constraints may be empty if config_model.constraints(): constraint_tags = list(config_model.constraints().keys()) constraint_metrics = MetricsManager.get_metric_types( tags=constraint_tags) constraints = { constraint_metrics[i]: config_model.constraints()[constraint_tags[i]] for i in range(len(constraint_tags)) } self._result_comparator = ResultComparator( gpu_metric_types=self._dcgm_metrics, non_gpu_metric_types=self._perf_metrics + self._cpu_metrics, metric_objectives=objectives) self._result_manager.set_constraints_and_comparator( constraints=constraints, comparator=self._result_comparator)
def _construct_result_comparator(self, gpu_metric_tags, non_gpu_metric_tags, objective_spec): """ Constructs a result comparator from the given objective spec dictionary """ gpu_metric_types = MetricsManager.get_metric_types(gpu_metric_tags) non_gpu_metric_types = MetricsManager.get_metric_types( non_gpu_metric_tags) objective_tags = list(objective_spec.keys()) objective_metrics = MetricsManager.get_metric_types(objective_tags) objectives = { objective_metrics[i]: objective_spec[objective_tags[i]] for i in range(len(objective_tags)) } return ResultComparator(gpu_metric_types=gpu_metric_types, non_gpu_metric_types=non_gpu_metric_types, metric_objectives=objectives)
def test_add_results(self): objective_spec = {'perf_throughput': 10} self.result_comparator = ResultComparator( metric_objectives=objective_spec) avg_gpu_metrics = {0: {'gpu_used_memory': 6000, 'gpu_utilization': 60}} for i in range(10): avg_non_gpu_metrics = { 'perf_throughput': 100 + 10 * i, 'perf_latency': 4000 } self.report_manager.add_result( report_key='test_report1', result=construct_result( avg_gpu_metric_values=avg_gpu_metrics, avg_non_gpu_metric_values=avg_non_gpu_metrics, comparator=self.result_comparator)) for i in range(5): avg_non_gpu_metrics = { 'perf_throughput': 200 + 10 * i, 'perf_latency': 4000 } self.report_manager.add_result( report_key='test_report2', result=construct_result( avg_gpu_metric_values=avg_gpu_metrics, avg_non_gpu_metric_values=avg_non_gpu_metrics, comparator=self.result_comparator)) self.assertEqual(self.report_manager.report_keys(), ['test_report1', 'test_report2']) report1_data = self.report_manager.data('test_report1') report2_data = self.report_manager.data('test_report2') self.assertEqual(len(report1_data), 10) self.assertEqual(len(report2_data), 5)
def setUp(self): objective_spec = {'perf_throughput': 10, 'perf_latency': 5} self.result_heap = ResultHeap() self.result_comparator = ResultComparator( metric_objectives=objective_spec)