示例#1
0
    def test_measurement_request_count_increase(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=self.gpus)
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH,
                                     config=self.config,
                                     max_retries=10,
                                     timeout=100,
                                     max_cpu_util=50)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()

        # Test the timeout for count mode
        self.client.wait_for_server_ready(num_retries=1)
        test_both_output = "Please use a larger time window"
        self.perf_mock.set_perf_analyzer_result_string(test_both_output)
        self.perf_mock.set_perf_analyzer_return_code(1)
        perf_metrics = [PerfThroughput, PerfLatencyP99]
        perf_analyzer.run(perf_metrics)
        self.assertEqual(
            self.perf_mock.get_perf_analyzer_popen_read_call_count(), 10)
示例#2
0
    def test_measurement_interval_increase(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=self.gpus)
        perf_analyzer_config = PerfAnalyzerConfig()
        perf_analyzer_config['model-name'] = TEST_MODEL_NAME
        perf_analyzer_config['concurrency-range'] = TEST_CONCURRENCY_RANGE
        perf_analyzer_config['measurement-mode'] = 'time_windows'
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH,
                                     config=self.config,
                                     max_retries=10,
                                     timeout=100,
                                     max_cpu_util=50)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()

        # Test failure to stabilize for measurement windows
        self.client.wait_for_server_ready(num_retries=1)
        test_stabilize_output = "Please use a larger time window"
        self.perf_mock.set_perf_analyzer_result_string(test_stabilize_output)
        self.perf_mock.set_perf_analyzer_return_code(1)
        perf_metrics = [PerfThroughput, PerfLatencyP99]
        perf_analyzer.run(perf_metrics)
        self.assertEqual(
            self.perf_mock.get_perf_analyzer_popen_read_call_count(), 10)
    def _get_perf_analyzer_metrics(self, perf_config, perf_output_writer=None):
        """
        Gets the aggregated metrics from the perf_analyzer

        Parameters
        ----------
        perf_config : dict
            The keys are arguments to perf_analyzer The values are their
            values
        perf_output_writer : OutputWriter
            Writer that writes the output from perf_analyzer to the output
            stream/file. If None, the output is not written

        Raises
        ------
        TritonModelAnalyzerException
        """

        try:
            perf_analyzer = PerfAnalyzer(
                path=self._perf_analyzer_path,
                config=perf_config,
                timeout=self._config.perf_analyzer_timeout,
                max_cpu_util=self._config.perf_analyzer_cpu_util)
            status = perf_analyzer.run(self._perf_metrics)
            # PerfAnalzyer run was not succesful
            if status == 1:
                return 1
        except FileNotFoundError as e:
            raise TritonModelAnalyzerException(
                f"perf_analyzer binary not found : {e}")

        if perf_output_writer:
            perf_output_writer.write(perf_analyzer.output() + '\n')

        perf_records = perf_analyzer.get_records()
        perf_record_aggregator = RecordAggregator()
        perf_record_aggregator.insert_all(perf_records)

        return perf_record_aggregator.aggregate()
示例#4
0
    def test_run(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH, config=self.config)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()
        self.client.wait_for_server_ready(num_retries=1)

        # Run perf analyzer with dummy tags to check command parsing
        perf_tags = [id]
        _ = perf_analyzer.run(perf_tags)
        self.perf_mock.assert_perf_analyzer_run_as(
            [PERF_BIN_PATH, '-m', TEST_MODEL_NAME])

        # Test latency parsing
        test_latency_output = "Avg latency: 5000 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        perf_tags = [PerfLatency]
        records = perf_analyzer.run(perf_tags)
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 5000)

        # Test throughput parsing
        test_throughput_output = "Throughput: 46.8 infer/sec\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_throughput_output)
        perf_tags = [PerfThroughput]
        records = perf_analyzer.run(perf_tags)
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 46.8)

        # Test parsing for both
        test_both_output = "Throughput: 0.001 infer/sec\nAvg latency: 3.6 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_both_output)
        perf_tags = [PerfLatency, PerfThroughput]
        records = perf_analyzer.run(perf_tags)
        self.assertEqual(len(records), 2)
        self.assertEqual(records[0].value(), 3.6)
        self.assertEqual(records[1].value(), 0.001)

        # Test exception handling
        with self.assertRaisesRegex(
                expected_exception=TritonModelAnalyzerException,
                expected_regex="Running perf_analyzer with",
                msg="Expected TritonModelAnalyzerException"):
            self.perf_mock.raise_exception_on_run()
            _ = perf_analyzer.run(perf_tags)

        self.server.stop()
    def _get_perf_analyzer_metrics(self,
                                   perf_config,
                                   perf_output_writer=None,
                                   perf_analyzer_env=None):
        """
        Gets the aggregated metrics from the perf_analyzer
        Parameters
        ----------
        perf_config : dict
            The keys are arguments to perf_analyzer The values are their
            values
        perf_output_writer : OutputWriter
            Writer that writes the output from perf_analyzer to the output
            stream/file. If None, the output is not written
        perf_analyzer_env : dict
            a dict of name:value pairs for the environment variables with which
            perf_analyzer should be run.

        Raises
        ------
        TritonModelAnalyzerException
        """

        perf_analyzer = PerfAnalyzer(
            path=self._config.perf_analyzer_path,
            config=perf_config,
            max_retries=self._config.perf_analyzer_max_auto_adjusts,
            timeout=self._config.perf_analyzer_timeout,
            max_cpu_util=self._config.perf_analyzer_cpu_util)

        # IF running with C_API, need to set CUDA_VISIBLE_DEVICES here
        if self._config.triton_launch_mode == 'c_api':
            perf_analyzer_env['CUDA_VISIBLE_DEVICES'] = ','.join(
                [gpu.device_uuid() for gpu in self._gpus])

        status = perf_analyzer.run(self._perf_metrics, env=perf_analyzer_env)

        if perf_output_writer:
            perf_output_writer.write(
                '============== Perf Analyzer Launched ==============\n '
                f'Command: perf_analyzer {perf_config.to_cli_string()} \n\n',
                append=True)
            if perf_analyzer.output():
                perf_output_writer.write(perf_analyzer.output() + '\n',
                                         append=True)

        # PerfAnalyzer run was not succesful
        if status == 1:
            return 1

        perf_records = perf_analyzer.get_records()
        perf_record_aggregator = RecordAggregator()
        perf_record_aggregator.insert_all(perf_records)

        return perf_record_aggregator.aggregate()
    def test_run(self, requests_mock):
        # Now create a server config
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)
        perf_client = PerfAnalyzer(path=PERF_BIN_PATH, config=self.config)

        self.server.start()
        requests_mock.get.return_value.status_code = 200
        self.server.wait_for_ready(num_retries=1)

        # Run perf analyzer
        throughput_record, latency_record = perf_client.run()
        self.perf_mock.assert_perf_analyzer_run_as(
            [PERF_BIN_PATH, '-m', TEST_MODEL_NAME])
        self.server.stop()

        # Test latency parsing
        test_latency_output = "Avg latency: 5000 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        _, latency_record = perf_client.run()
        self.assertEqual(latency_record.value(), 5000)

        # Test throughput parsing
        test_throughput_output = "Throughput: 46.8 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_throughput_output)
        throughput_record, _ = perf_client.run()
        self.assertEqual(throughput_record.value(), 46.8)

        # Test parsing for both
        test_both_output = "Throughput: 0.001 ms\nAvg latency: 3.6 ms\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_both_output)
        throughput_record, latency_record = perf_client.run()
        self.assertEqual(throughput_record.value(), 0.001)
        self.assertEqual(latency_record.value(), 3.6)
示例#7
0
    def test_run(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config)
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH,
                                     config=self.config,
                                     timeout=100,
                                     max_cpu_util=50)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()
        self.client.wait_for_server_ready(num_retries=1)

        # Run perf analyzer with dummy metrics to check command parsing
        perf_metrics = [id]
        test_latency_output = "p99 latency: 5000 us\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        perf_analyzer.run(perf_metrics)
        self.perf_mock.assert_perf_analyzer_run_as([
            PERF_BIN_PATH, '-m', TEST_MODEL_NAME, '--measurement-interval',
            str(self.config['measurement-interval'])
        ])

        # Test latency parsing
        test_latency_output = "p99 latency: 5000 us\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_latency_output)
        perf_metrics = [PerfLatency]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 5)

        # Test throughput parsing
        test_throughput_output = "Throughput: 46.8 infer/sec\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_throughput_output)
        perf_metrics = [PerfThroughput]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 46.8)

        # Test parsing for both
        test_both_output = "Throughput: 0.001 infer/sec\np99 latency: 3600 us\n\n\n\n"
        self.perf_mock.set_perf_analyzer_result_string(test_both_output)
        perf_metrics = [PerfThroughput, PerfLatency]
        perf_analyzer.run(perf_metrics)
        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 2)
        self.assertEqual(records[0].value(), 0.001)
        self.assertEqual(records[1].value(), 3.6)

        # Test exception handling
        self.perf_mock.set_perf_analyzer_return_code(1)
        self.assertTrue(perf_analyzer.run(perf_metrics), 1)
        self.server.stop()
示例#8
0
    def test_run(self):
        server_config = TritonServerConfig()
        server_config['model-repository'] = MODEL_REPOSITORY_PATH

        # Create server, client, PerfAnalyzer, and wait for server ready
        self.server = TritonServerFactory.create_server_local(
            path=TRITON_LOCAL_BIN_PATH, config=server_config, gpus=self.gpus)
        perf_analyzer = PerfAnalyzer(path=PERF_BIN_PATH,
                                     config=self.config,
                                     max_retries=10,
                                     timeout=100,
                                     max_cpu_util=50)
        self.client = TritonClientFactory.create_grpc_client(
            server_url=TEST_GRPC_URL)
        self.server.start()
        self.client.wait_for_server_ready(num_retries=1)

        pa_csv_mock = """Concurrency,Inferences/Second,Client Send,Network+Server Send/Recv,Server Queue,Server Compute Input,Server Compute Infer,Server Compute Output,Client Recv,p50 latency,p90 latency,p95 latency,p99 latency,Avg latency,request/response,response wait\n1,46.8,2,187,18,34,65,16,1,4600,4700,4800,4900,5000,3,314"""

        # Test avg latency parsing
        perf_metrics = [PerfLatencyAvg]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 5)

        # Test p90 latency parsing
        perf_metrics = [PerfLatencyP90]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 4.7)

        # Test p95 latency parsing
        perf_metrics = [PerfLatencyP95]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 4.8)

        # Test p99 latency parsing
        perf_metrics = [PerfLatencyP99]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 4.9)

        # Test throughput parsing
        perf_metrics = [PerfThroughput]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 46.8)

        # Test client response wait
        perf_metrics = [PerfClientResponseWait]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.314)

        # Test server queue
        perf_metrics = [PerfServerQueue]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.018)

        # Test server compute infer
        perf_metrics = [PerfServerComputeInfer]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.065)

        # Test server compute input
        perf_metrics = [PerfServerComputeInput]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.034)

        # Test server compute infer
        perf_metrics = [PerfServerComputeOutput]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 1)
        self.assertEqual(records[0].value(), 0.016)

        # # Test parsing for subset
        perf_metrics = [
            PerfThroughput, PerfLatencyAvg, PerfLatencyP90, PerfLatencyP95,
            PerfLatencyP99
        ]

        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            perf_analyzer.run(perf_metrics)

        records = perf_analyzer.get_records()
        self.assertEqual(len(records), 5)

        # Test no exceptions are raised when nothing can be parsed
        pa_csv_empty = ""
        perf_metrics = [
            PerfThroughput, PerfClientSendRecv, PerfClientResponseWait,
            PerfServerQueue, PerfServerComputeInfer, PerfServerComputeInput,
            PerfServerComputeOutput
        ]
        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            self.assertFalse(perf_analyzer.run(perf_metrics))

        # Test exception handling
        self.perf_mock.set_perf_analyzer_return_code(1)
        with patch('model_analyzer.perf_analyzer.perf_analyzer.open',
                   mock_open(read_data=pa_csv_mock)), patch(
                       'model_analyzer.perf_analyzer.perf_analyzer.os.remove'):
            self.assertTrue(perf_analyzer.run(perf_metrics))
        self.server.stop()