def test_update_dynamic_profiling_result(): model = ModelService.get_models('ResNet50')[0] dummy_info_tuple = InfoTuple(avg=1, p50=1, p95=1, p99=1) updated_info_tuple = InfoTuple(avg=1, p50=2, p95=1, p99=1) dpr = DynamicProfileResultBO(device_id='gpu:01', device_name='Tesla K40c', batch=1, memory=ProfileMemory(1000, 2000, 0.5), latency=ProfileLatency( init_latency=dummy_info_tuple, preprocess_latency=dummy_info_tuple, inference_latency=updated_info_tuple, postprocess_latency=dummy_info_tuple, ), throughput=ProfileThroughput( batch_formation_throughput=1, preprocess_throughput=1, inference_throughput=1, postprocess_throughput=1, )) # check update assert ModelService.update_dynamic_profiling_result(model.id, dpr) # check result model = ModelService.get_models('ResNet50')[0] assert model.profile_result.dynamic_results[0].memory.memory_usage == 2000 assert model.profile_result.dynamic_results[ 0].latency.inference_latency.p50 == 2
def diagnose(self, batch_size: int = None, device='cuda', timeout=30) -> DynamicProfileResultBO: """Start diagnosing and profiling model. Args: batch_size (int): Batch size. device (str): Device name. timeout (float): Waiting for docker container timeout in second. Default timeout period is 30s. """ model_status = False retry_time = 0 # use binary exponential backoff algorithm tick = time.time() while time.time() - tick < timeout: if self.inspector.check_model_status(): model_status = True break retry_time += 1 # get backoff time in s backoff_time = random.randint(0, 2**retry_time - 1) * 1e-3 time.sleep(backoff_time) if not model_status: # raise an error as model is not served. raise ServiceException('Model not served!') if batch_size is not None: self.inspector.set_batch_size(batch_size) result = self.inspector.run_model(server_name=self.server_name, device=device) dpr_bo = DynamicProfileResultBO( ip=get_ip(), device_id=result['device_id'], device_name=result['device_name'], batch=result['batch_size'], memory=ProfileMemory( total_memory=result['total_gpu_memory'], memory_usage=result['gpu_memory_used'], utilization=result['gpu_utilization'], ), latency=ProfileLatency(inference_latency=result['latency'], ), throughput=ProfileThroughput( inference_throughput=result['total_throughput']), create_time=result['completed_time'], ) return dpr_bo
def test_register_dynamic_profiling_result(): model = ModelService.get_models('ResNet50')[0] dummy_info_tuple = InfoTuple(avg=1, p50=1, p95=1, p99=1) dpr = DynamicProfileResultBO(device_id='gpu:01', device_name='Tesla K40c', batch=1, memory=ProfileMemory(1000, 1000, 0.5), latency=ProfileLatency( init_latency=dummy_info_tuple, preprocess_latency=dummy_info_tuple, inference_latency=dummy_info_tuple, postprocess_latency=dummy_info_tuple, ), throughput=ProfileThroughput( batch_formation_throughput=1, preprocess_throughput=1, inference_throughput=1, postprocess_throughput=1, )) assert ModelService.append_dynamic_profiling_result(model.id, dpr)
def test_delete_dynamic_profiling_result(): model = ModelService.get_models('ResNet50')[0] dummy_info_tuple1 = InfoTuple(avg=1, p50=1, p95=1, p99=2) dummy_info_tuple2 = InfoTuple(avg=1, p50=1, p95=1, p99=1) dpr = DynamicProfileResultBO(device_id='gpu:02', device_name='Tesla K40c', batch=1, memory=ProfileMemory(1000, 1000, 0.5), latency=ProfileLatency( init_latency=dummy_info_tuple1, preprocess_latency=dummy_info_tuple2, inference_latency=dummy_info_tuple2, postprocess_latency=dummy_info_tuple2, ), throughput=ProfileThroughput( batch_formation_throughput=1, preprocess_throughput=1, inference_throughput=1, postprocess_throughput=1, )) ModelService.append_dynamic_profiling_result(model.id, dpr) # reload model = ModelService.get_models('ResNet50')[0] dpr_bo = model.profile_result.dynamic_results[0] dpr_bo2 = model.profile_result.dynamic_results[1] # check delete assert ModelService.delete_dynamic_profiling_result( model.id, dpr_bo.ip, dpr_bo.device_id) # check result model = ModelService.get_models('ResNet50')[0] assert len(model.profile_result.dynamic_results) == 1 dpr_left = model.profile_result.dynamic_results[0] assert dpr_bo2.latency.init_latency.avg == dpr_left.latency.init_latency.avg