示例#1
0
    def test_cpu_stats(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH
        gpus = ['all']

        # Test local server cpu_stats
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)
        self.server.start()
        _, _ = self.server.cpu_stats()
        self.server_local_mock.assert_cpu_stats_called()
        self.server.stop()

        # Test docker server cpu stats
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE, config=server_config, gpus=gpus)
        self.server.start()

        # The following needs to be called as it resets exec_run return value
        self.server_docker_mock.assert_server_process_start_called_with(
            TRITON_DOCKER_BIN_PATH + ' ' + server_config.to_cli_string(),
            MODEL_REPOSITORY_PATH, TRITON_IMAGE, 8000, 8001, 8002)
        _, _ = self.server.cpu_stats()
        self.server_docker_mock.assert_cpu_stats_called()
        self.server.stop()
    def test_server_config(self):

        # Create a TritonServerConfig
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Check config initializations
        self.assertIsNone(server_config[CONFIG_TEST_ARG],
                          msg="Server config had unexpected initial"
                          f"value for {CONFIG_TEST_ARG}")
        # Set value
        server_config[CONFIG_TEST_ARG] = True

        # Test get again
        self.assertTrue(server_config[CONFIG_TEST_ARG],
                        msg=f"{CONFIG_TEST_ARG} was not set")

        # Try to set an unsupported config argument, expect failure
        with self.assertRaises(TritonModelAnalyzerException,
                               msg="Expected exception on trying to set"
                               "unsupported argument in Triton server"
                               "config"):
            server_config['dummy'] = 1

        # Reset test arg
        server_config[CONFIG_TEST_ARG] = None

        # Finally set a couple of args and then check the cli string
        for arg, value in CLI_TO_STRING_TEST_ARGS.items():
            server_config[arg] = value

        cli_string = server_config.to_cli_string()
        for argstring in cli_string.split():

            # Parse the created string
            arg, value = argstring.split('=')
            arg = arg[2:]

            # Make sure each parsed arg was in test dict
            self.assertIn(arg,
                          CLI_TO_STRING_TEST_ARGS,
                          msg=f"CLI string contained unknown argument: {arg}")

            # Make sure parsed value is the one from dict, check type too
            test_value = CLI_TO_STRING_TEST_ARGS[arg]
            self.assertEqual(
                test_value,
                type(test_value)(value),
                msg=f"CLI string contained unknown value: {value}")
示例#3
0
    def test_triton_server_ssl_options(self):
        server_config = TritonServerConfig()

        triton_server_flags = {
            'grpc-use-ssl': '1',
            'grpc-use-ssl-mutual': '1',
            'grpc-server-cert': 'a',
            'grpc-server-key': 'b',
            'grpc-root-cert': 'c',
        }
        server_config.update_config(triton_server_flags)

        expected_cli_str = f"--grpc-use-ssl=1 --grpc-use-ssl-mutual=1 "\
            f"--grpc-server-cert=a --grpc-server-key=b --grpc-root-cert=c"
        self.assertEqual(server_config.to_cli_string(), expected_cli_str)
示例#4
0
    def _test_create_server(self, gpus):
        # Create a TritonServerConfig
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Run for both types of environments
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE, config=server_config, gpus=gpus)

        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)

        # Try to create a server without specifying model repository and expect
        # error
        server_config['model-repository'] = None
        with self.assertRaises(
                AssertionError,
                msg="Expected AssertionError for trying to create"
                "server without specifying model repository."):
            self.server = TritonServerFactory.create_server_docker(
                image=TRITON_IMAGE, config=server_config, gpus=gpus)
        with self.assertRaises(
                AssertionError,
                msg="Expected AssertionError for trying to create"
                "server without specifying model repository."):
            self.server = TritonServerFactory.create_server_local(
                path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)
示例#5
0
    def start_stop_docker_args(self):
        device_requests, gpu_uuids = self._find_correct_gpu_settings(
            self._sys_gpus)

        # Create a TritonServerConfig
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create mounts and labels
        mounts = [
            '/host/path:/dest/path:ro', '/another/host/path:/some/dest/path:rw'
        ]
        labels = {'RUNNER_ID': 'TEST_RUNNER_ID'}

        environment = {'VARIABLE': 'VALUE'}
        # Create server in docker, start , wait, and stop
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE,
            config=server_config,
            gpus=self._sys_gpus,
            mounts=mounts,
            labels=labels)

        # Start server check that mocked api is called
        self.server.start(env=environment)
        self.server_docker_mock.assert_server_process_start_called_with(
            f"{TRITON_DOCKER_BIN_PATH} {server_config.to_cli_string()}",
            MODEL_REPOSITORY_PATH, TRITON_IMAGE, device_requests, gpu_uuids,
            8000, 8001, 8002, mounts, labels)

        # Stop container and check api calls
        self.server.stop()
        self.server_docker_mock.assert_server_process_terminate_called()
    def test_monitor_disable(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH
        gpus = [
            GPUDevice('TEST_DEVICE_NAME', 0, "TEST_PCI_BUS_ID", "TEST_UUID")
        ]

        frequency = 1
        monitoring_time = 2
        metrics = []

        server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)

        # Start triton and monitor
        server.start()
        cpu_monitor = CPUMonitor(server, frequency, metrics)
        cpu_monitor.start_recording_metrics()
        time.sleep(monitoring_time)
        records = cpu_monitor.stop_recording_metrics()

        # Assert no library calls
        self.server_local_mock.assert_cpu_stats_not_called()

        cpu_monitor.destroy()
        server.stop()
示例#7
0
    def test_measurement_interval_increase(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=self.gpus)
        perf_analyzer_config = PerfAnalyzerConfig()
        perf_analyzer_config['model-name'] = TEST_MODEL_NAME
        perf_analyzer_config['concurrency-range'] = TEST_CONCURRENCY_RANGE
        perf_analyzer_config['measurement-mode'] = 'time_windows'
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH,
                                     config=self.config,
                                     max_retries=10,
                                     timeout=100,
                                     max_cpu_util=50)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()

        # Test failure to stabilize for measurement windows
        self.client.wait_for_server_ready(num_retries=1)
        test_stabilize_output = "Please use a larger time window"
        self.perf_mock.set_perf_analyzer_result_string(test_stabilize_output)
        self.perf_mock.set_perf_analyzer_return_code(1)
        perf_metrics = [PerfThroughput, PerfLatencyP99]
        perf_analyzer.run(perf_metrics)
        self.assertEqual(
            self.perf_mock.get_perf_analyzer_popen_read_call_count(), 10)
示例#8
0
    def test_measurement_request_count_increase(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=self.gpus)
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH,
                                     config=self.config,
                                     max_retries=10,
                                     timeout=100,
                                     max_cpu_util=50)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()

        # Test the timeout for count mode
        self.client.wait_for_server_ready(num_retries=1)
        test_both_output = "Please use a larger time window"
        self.perf_mock.set_perf_analyzer_result_string(test_both_output)
        self.perf_mock.set_perf_analyzer_return_code(1)
        perf_metrics = [PerfThroughput, PerfLatencyP99]
        perf_analyzer.run(perf_metrics)
        self.assertEqual(
            self.perf_mock.get_perf_analyzer_popen_read_call_count(), 10)
示例#9
0
    def _test_cpu_stats(self, gpus):
        device_requests = [device.device_id() for device in gpus]

        # Create a TritonServerConfig
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Test local server cpu_stats
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)
        self.server.start()
        _, _ = self.server.cpu_stats()
        self.server_local_mock.assert_cpu_stats_called()
        self.server.stop()

        # Test docker server cpu stats
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE, config=server_config, gpus=gpus)
        self.server.start()

        # The following needs to be called as it resets exec_run return value
        self.server_docker_mock.assert_server_process_start_called_with(
            f'{TRITON_DOCKER_BIN_PATH} {server_config.to_cli_string()}',
            MODEL_REPOSITORY_PATH, TRITON_IMAGE, device_requests, gpus, 8000,
            8001, 8002)
        _, _ = self.server.cpu_stats()
        self.server_docker_mock.assert_cpu_stats_called()
        self.server.stop()
示例#10
0
    def test_run(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH, config=self.config)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()
        self.client.wait_for_server_ready(num_retries=1)

        # Run perf analyzer with dummy metrics to check command parsing
        perf_metrics = [id]
        test_latency_output = "Avg latency: 5000 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        perf_analyzer.run(perf_metrics)
        self.perf_mock.assert_perf_analyzer_run_as(
            [PERF_BIN_PATH, '-m', TEST_MODEL_NAME])

        # Test latency parsing
        test_latency_output = "Avg latency: 5000 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        perf_metrics = [PerfLatency]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 5000)

        # Test throughput parsing
        test_throughput_output = "Throughput: 46.8 infer/sec\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_throughput_output)
        perf_metrics = [PerfThroughput]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 46.8)

        # Test parsing for both
        test_both_output = "Throughput: 0.001 infer/sec\nAvg latency: 3.6 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_both_output)
        perf_metrics = [PerfThroughput, PerfLatency]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 2)
        self.assertEqual(records[0].value(), 0.001)
        self.assertEqual(records[1].value(), 3.6)

        # Test exception handling
        with self.assertRaisesRegex(
                expected_exception=TritonModelAnalyzerException,
                expected_regex="Running perf_analyzer with",
                msg="Expected TritonModelAnalyzerException"):
            self.perf_mock.raise_exception_on_run()
            perf_analyzer.run(perf_metrics)

        self.server.stop()
示例#11
0
    def test_run(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH,
                                     config=self.config,
                                     timeout=100,
                                     max_cpu_util=50)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()
        self.client.wait_for_server_ready(num_retries=1)

        # Run perf analyzer with dummy metrics to check command parsing
        perf_metrics = [id]
        test_latency_output = "p99 latency: 5000 us\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        perf_analyzer.run(perf_metrics)
        self.perf_mock.assert_perf_analyzer_run_as([
            PERF_BIN_PATH, '-m', TEST_MODEL_NAME, '--measurement-interval',
            str(self.config['measurement-interval'])
        ])

        # Test latency parsing
        test_latency_output = "p99 latency: 5000 us\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        perf_metrics = [PerfLatency]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 5)

        # Test throughput parsing
        test_throughput_output = "Throughput: 46.8 infer/sec\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_throughput_output)
        perf_metrics = [PerfThroughput]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 46.8)

        # Test parsing for both
        test_both_output = "Throughput: 0.001 infer/sec\np99 latency: 3600 us\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_both_output)
        perf_metrics = [PerfThroughput, PerfLatency]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 2)
        self.assertEqual(records[0].value(), 0.001)
        self.assertEqual(records[1].value(), 3.6)

        # Test exception handling
        self.perf_mock.set_perf_analyzer_return_code(1)
        self.assertTrue(perf_analyzer.run(perf_metrics), 1)
        self.server.stop()
    def test_start_wait_stop_gpus(self):
        # Create a TritonServerConfig
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'

        # Create server in docker, start , wait, and stop
        self.server = TritonServerFactory.create_server_docker(
            model_path=MODEL_LOCAL_PATH,
            image=TRITON_IMAGE,
            config=server_config)

        # Set mock status_code to error, and generate exception
        self._mock_server_wait_for_ready(assert_raises=True)

        # Start server check that mocked api is called
        self.server.start()
        self.server_docker_mock.assert_server_process_start_called_with(
            TRITON_DOCKER_BIN_PATH + ' ' + server_config.to_cli_string(),
            MODEL_LOCAL_PATH, MODEL_REPOSITORY_PATH, TRITON_IMAGE, 8000, 8001,
            8002)

        # Mock status code for connected server then stop
        self._mock_server_wait_for_ready(assert_raises=False)

        # Stop container and check api calls
        self.server.stop()
        self.server_docker_mock.assert_server_process_terminate_called()

        # Create local server which runs triton as a subprocess
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)

        self._mock_server_wait_for_ready(assert_raises=True)

        # Check that API functions are called
        self.server.start()

        self.server_local_mock.assert_server_process_start_called_with(cmd=[
            TRITON_LOCAL_BIN_PATH, '--model-repository', MODEL_REPOSITORY_PATH
        ])

        self._mock_server_wait_for_ready(assert_raises=False)
        self.server.stop()
        self.server_local_mock.assert_server_process_terminate_called()
    def test_start_stop_gpus(self):
        # Create a TritonServerConfig
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH
        gpus = ['all']

        # Create server in docker, start , wait, and stop
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE, config=server_config, gpus=gpus)

        # Start server check that mocked api is called
        self.server.start()
        self.server_docker_mock.assert_server_process_start_called_with(
            TRITON_DOCKER_BIN_PATH + ' ' + server_config.to_cli_string(),
            MODEL_REPOSITORY_PATH, TRITON_IMAGE, 8000, 8001, 8002)

        self.server_docker_mock.raise_exception_on_container_run()
        with self.assertRaises(TritonModelAnalyzerException):
            self.server.start()
        self.server_docker_mock.stop_raise_exception_on_container_run()

        # Stop container and check api calls
        self.server.stop()
        self.server_docker_mock.assert_server_process_terminate_called()

        # Create local server which runs triton as a subprocess
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)

        # Check that API functions are called
        self.server.start()

        self.server_local_mock.assert_server_process_start_called_with(cmd=[
            TRITON_LOCAL_BIN_PATH, '--model-repository', MODEL_REPOSITORY_PATH
        ])

        self.server.stop()
        self.server_local_mock.assert_server_process_terminate_called()
示例#14
0
    def setUp(self):
        # Mocks
        self.mock_server_docker = MockServerDockerMethods()
        self.tritonclient_mock = MockTritonClientMethods()

        # Create server config
        self.server_config = TritonServerConfig()
        self.server_config['model-repository'] = MODEL_REPOSITORY_PATH
        self.server_config['model-control-mode'] = 'explicit'

        # Set CUDA_VISIBLE_DEVICES
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'

        # Create and start the server
        self.server = TritonServerFactory.create_server_docker(
            model_path=MODEL_LOCAL_PATH,
            image=TRITON_IMAGE,
            config=self.server_config)
示例#15
0
    def _test_get_logs(self, gpus):
        # Create a TritonServerConfig
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Check docker server logs
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE, config=server_config, gpus=gpus)
        self.server.start()
        self.server.stop()
        self.server_docker_mock.assert_server_process_terminate_called()
        self.assertEqual(self.server.logs(), "Triton Server Test Log")

        # Create local server logs
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)
        self.server.start()
        self.server.stop()
        self.server_local_mock.assert_server_process_terminate_called()
        self.assertEqual(self.server.logs(), "Triton Server Test Log")
    def test_run(self, requests_mock):
        # Now create a server config
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)
        perf_client = PerfAnalyzer(path=PERF_BIN_PATH, config=self.config)

        self.server.start()
        requests_mock.get.return_value.status_code = 200
        self.server.wait_for_ready(num_retries=1)

        # Run perf analyzer
        throughput_record, latency_record = perf_client.run()
        self.perf_mock.assert_perf_analyzer_run_as(
            [PERF_BIN_PATH, '-m', TEST_MODEL_NAME])
        self.server.stop()

        # Test latency parsing
        test_latency_output = "Avg latency: 5000 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        _, latency_record = perf_client.run()
        self.assertEqual(latency_record.value(), 5000)

        # Test throughput parsing
        test_throughput_output = "Throughput: 46.8 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_throughput_output)
        throughput_record, _ = perf_client.run()
        self.assertEqual(throughput_record.value(), 46.8)

        # Test parsing for both
        test_both_output = "Throughput: 0.001 ms\nAvg latency: 3.6 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_both_output)
        throughput_record, latency_record = perf_client.run()
        self.assertEqual(throughput_record.value(), 0.001)
        self.assertEqual(latency_record.value(), 3.6)
    def test_record_cpu_memory(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH
        gpus = [
            GPUDevice('TEST_DEVICE_NAME', 0, "TEST_PCI_BUS_ID", "TEST_UUID")
        ]

        frequency = 1
        monitoring_time = 2
        metrics = [CPUAvailableRAM, CPUUsedRAM]

        server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=gpus)

        # Start triton and monitor
        server.start()
        cpu_monitor = CPUMonitor(server, frequency, metrics)
        cpu_monitor.start_recording_metrics()
        time.sleep(monitoring_time)
        records = cpu_monitor.stop_recording_metrics()

        # Assert library calls
        self.server_local_mock.assert_cpu_stats_called()

        # Assert instance types
        for record in records:
            self.assertIsInstance(record.value(), float)
            self.assertIsInstance(record.timestamp(), int)

        # The number of records should be dividable by number of metrics
        self.assertTrue(len(records) % len(metrics) == 0)
        self.assertTrue(len(records) > 0)

        with self.assertRaises(TritonModelAnalyzerException):
            cpu_monitor.stop_recording_metrics()

        cpu_monitor.destroy()
        server.stop()
    def setUp(self):

        # GPUs
        gpus = [
            GPUDevice('TEST_DEVICE_NAME', 0, "TEST_PCI_BUS_ID", "TEST_UUID")
        ]

        # Mocks
        self.server_docker_mock = MockServerDockerMethods()
        self.tritonclient_mock = MockTritonClientMethods()
        self.server_docker_mock.start()
        self.tritonclient_mock.start()

        # Create server config
        self.server_config = TritonServerConfig()
        self.server_config['model-repository'] = MODEL_REPOSITORY_PATH
        self.server_config['model-control-mode'] = 'explicit'

        # Set CUDA_VISIBLE_DEVICES
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'

        # Create and start the server
        self.server = TritonServerFactory.create_server_docker(
            image=TRITON_IMAGE, config=self.server_config, gpus=gpus)
示例#19
0
    def test_run(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=self.gpus)
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH,
                                     config=self.config,
                                     max_retries=10,
                                     timeout=100,
                                     max_cpu_util=50)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()
        self.client.wait_for_server_ready(num_retries=1)

        pa_csv_mock = """Concurrency,Inferences/Second,Client Send,Network+Server Send/Recv,Server Queue,Server Compute Input,Server Compute Infer,Server Compute Output,Client Recv,p50 latency,p90 latency,p95 latency,p99 latency,Avg latency,request/response,response wait\n1,46.8,2,187,18,34,65,16,1,4600,4700,4800,4900,5000,3,314"""

        # Test avg latency parsing
        perf_metrics = [PerfLatencyAvg]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 5)

        # Test p90 latency parsing
        perf_metrics = [PerfLatencyP90]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 4.7)

        # Test p95 latency parsing
        perf_metrics = [PerfLatencyP95]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 4.8)

        # Test p99 latency parsing
        perf_metrics = [PerfLatencyP99]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 4.9)

        # Test throughput parsing
        perf_metrics = [PerfThroughput]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 46.8)

        # Test client response wait
        perf_metrics = [PerfClientResponseWait]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.314)

        # Test server queue
        perf_metrics = [PerfServerQueue]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.018)

        # Test server compute infer
        perf_metrics = [PerfServerComputeInfer]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.065)

        # Test server compute input
        perf_metrics = [PerfServerComputeInput]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.034)

        # Test server compute infer
        perf_metrics = [PerfServerComputeOutput]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.016)

        # # Test parsing for subset
        perf_metrics = [
            PerfThroughput, PerfLatencyAvg, PerfLatencyP90, PerfLatencyP95,
            PerfLatencyP99
        ]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 5)

        # Test no exceptions are raised when nothing can be parsed
        pa_csv_empty = ""
        perf_metrics = [
            PerfThroughput, PerfClientSendRecv, PerfClientResponseWait,
            PerfServerQueue, PerfServerComputeInfer, PerfServerComputeInput,
            PerfServerComputeOutput
        ]
        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            self.assertFalse(perf_analyzer.run(perf_metrics))

        # Test exception handling
        self.perf_mock.set_perf_analyzer_return_code(1)
        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            self.assertTrue(perf_analyzer.run(perf_metrics))
        self.server.stop()
示例#20
0
    def _add_profile_models_configs(self):
        """
        Adds configs specific to model specifications
        """
        triton_server_flags_scheme = ConfigObject(schema={
            k: ConfigPrimitive(str) for k in TritonServerConfig.allowed_keys()
        })
        perf_analyzer_additive_keys = {
            k: None for k in PerfAnalyzerConfig.additive_keys()
        }
        perf_analyzer_flags_scheme = ConfigObject(
            schema={
                k:
                ((ConfigUnion([ConfigPrimitive(
                    type_=str), ConfigListString()])) if (
                        k in perf_analyzer_additive_keys) else ConfigPrimitive(
                            type_=str))
                for k in PerfAnalyzerConfig.allowed_keys()
            })
        triton_server_environment_scheme = ConfigObject(
            schema={'*': ConfigPrimitive(str)})
        self._add_config(
            ConfigField(
                'perf_analyzer_flags',
                field_type=perf_analyzer_flags_scheme,
                description=
                'Allows custom configuration of the perf analyzer instances used by model analyzer.'
            ))
        self._add_config(
            ConfigField(
                'triton_server_flags',
                field_type=triton_server_flags_scheme,
                description=
                'Allows custom configuration of the triton instances used by model analyzer.'
            ))
        self._add_config(
            ConfigField(
                'triton_server_environment',
                field_type=triton_server_environment_scheme,
                description=
                'Allows setting environment variables for tritonserver server instances launched by Model Analyzer'
            ))

        objectives_scheme = ConfigUnion([
            ConfigObject(
                schema={
                    tag: ConfigPrimitive(type_=int)
                    for tag in RecordType.get_all_record_types().keys()
                }),
            ConfigListString(output_mapper=objective_list_output_mapper)
        ])
        constraints_scheme = ConfigObject(
            schema={
                'perf_throughput':
                    ConfigObject(schema={
                        'min': ConfigPrimitive(int),
                    }),
                'perf_latency_avg':
                    ConfigObject(schema={
                        'max': ConfigPrimitive(int),
                    }),
                'perf_latency_p90':
                    ConfigObject(schema={
                        'max': ConfigPrimitive(int),
                    }),
                'perf_latency_p95':
                    ConfigObject(schema={
                        'max': ConfigPrimitive(int),
                    }),
                'perf_latency_p99':
                    ConfigObject(schema={
                        'max': ConfigPrimitive(int),
                    }),
                'perf_latency':
                    ConfigObject(schema={
                        'max': ConfigPrimitive(int),
                    }),
                'gpu_used_memory':
                    ConfigObject(schema={
                        'max': ConfigPrimitive(int),
                    }),
            })
        self._add_config(
            ConfigField(
                'objectives',
                field_type=objectives_scheme,
                default_value=DEFAULT_OFFLINE_OBJECTIVES,
                description=
                'Model Analyzer uses the objectives described here to find the best configuration for each model.'
            ))
        self._add_config(
            ConfigField(
                'constraints',
                field_type=constraints_scheme,
                description=
                'Constraints on the objectives specified in the "objectives" field of the config.'
            ))
        model_config_fields = self._get_model_config_fields()
        profile_model_scheme = ConfigObject(
            required=True,
            schema={
                # Any key is allowed, but the keys must follow the pattern
                # below
                '*':
                    ConfigObject(
                        schema={
                            'cpu_only':
                                ConfigPrimitive(bool),
                            'parameters':
                                ConfigObject(
                                    schema={
                                        'batch_sizes':
                                            ConfigListNumeric(type_=int),
                                        'concurrency':
                                            ConfigListNumeric(type_=int)
                                    }),
                            'objectives':
                                objectives_scheme,
                            'constraints':
                                constraints_scheme,
                            'model_config_parameters':
                                model_config_fields,
                            'perf_analyzer_flags':
                                perf_analyzer_flags_scheme,
                            'triton_server_flags':
                                triton_server_flags_scheme,
                            'triton_server_environment':
                                triton_server_environment_scheme
                        })
            },
            output_mapper=ConfigModelProfileSpec.
            model_object_to_config_model_profile_spec)
        self._add_config(
            ConfigField(
                'profile_models',
                flags=['--profile-models'],
                field_type=ConfigUnion([
                    profile_model_scheme,
                    ConfigListGeneric(ConfigUnion([
                        profile_model_scheme,
                        ConfigPrimitive(str,
                                        output_mapper=ConfigModelProfileSpec.
                                        model_str_to_config_model_profile_spec)
                    ]),
                                      required=True,
                                      output_mapper=ConfigModelProfileSpec.
                                      model_mixed_to_config_model_profile_spec),
                    ConfigListString(output_mapper=ConfigModelProfileSpec.
                                     model_list_to_config_model_profile_spec),
                ],
                                       required=True),
                description='List of the models to be profiled'))
        self._add_config(
            ConfigField(
                'batch_sizes',
                flags=['-b', '--batch-sizes'],
                field_type=ConfigListNumeric(int),
                default_value=DEFAULT_BATCH_SIZES,
                description=
                'Comma-delimited list of batch sizes to use for the profiling'))
        self._add_config(
            ConfigField(
                'concurrency',
                flags=['-c', '--concurrency'],
                field_type=ConfigListNumeric(int),
                description=
                "Comma-delimited list of concurrency values or ranges <start:end:step>"
                " to be used during profiling"))
        self._add_config(
            ConfigField(
                'reload_model_disable',
                field_type=ConfigPrimitive(bool),
                parser_args={'action': 'store_true'},
                default_value=False,
                flags=['--reload-model-disable'],
                description='Flag to indicate whether or not to disable model '
                'loading and unloading in remote mode.'))