def run(self): for vertex_config in self.config["vertex_configs"]: for arrival_config in self.config["arrival_config"]: serve_reference.init(start_server=False) filename_query = "arrival_trace.jsonl" route = "/prepoc" pipeline = ImagePrepocPipeline(vertex_config, self.config["model_type"]) vertex_config_name = json.dumps(vertex_config) df_row = dict( vertex_config=vertex_config_name, serving_type=self.config["serving_type"], arrival_process=json.dumps(arrival_config), ) image_path = os.path.join(ROOT_DIR, self.config["image_file_path"]) throughput_qps = self._throughput_calculation( pipeline, image_path, arrival_config["num_requests"]) df_row.update(throughput_qps=throughput_qps) pprint(df_row) http_actor = HTTPProxyActor.remote( host="127.0.0.1", port=8000, serving_backend=self.config["serving_type"], filename=filename_query, ) ray.get( http_actor.register_route.remote(route, pipeline.chain_handle)) go_client_path = os.path.join(ROOT_DIR, self.config["client_path"]) arrival_curve = generate_fixed_arrival_process( **arrival_config).tolist() arrival_curve_str = [str(x) for x in arrival_curve] ls_output = subprocess.Popen([ "go", "run", go_client_path, image_path, route, *arrival_curve_str, ]) ls_output.communicate() latency_s = get_latency(filename_query) os.remove(filename_query) df_row.update(latency_s=latency_s) self._df = self._df.append(df_row, ignore_index=True) # cleanup del latency_s, pipeline, arrival_curve, arrival_curve_str serve_reference.shutdown()
def run(self): tensor_data = construct_tensor(self.config) for batch_size, pipeline_length in product( self.config["max_batch_sizes"], self.config["pipeline_lengths"]): df_row = dict( batch_size=batch_size, pipeline_length=pipeline_length, tensor_type=self.config["tensor_type"], tensor_shape="x".join( [str(shape) for shape in self.config["tensor_shape"]]), serving_type=self.config["serving_type"], arrival_process=self.config["arrival_process"], ) # initialize serve serve_reference.init(start_server=False) chain_pipeline = Chain(max_batch_size=batch_size, pipeline_length=pipeline_length) # warmup ready_refs, _ = ray.wait( [chain_pipeline.remote(tensor_data) for _ in range(200)], 200) ray.wait(ready_refs, num_returns=200) del ready_refs qps = self._throughput_calculation(chain_pipeline, tensor_data) df_row.update(throughput_qps=qps) serve_reference.clear_trace() # closed loop latency calculation closed_loop_latencies = list() for _ in range(self.config["num_requests"]): start_time = time.perf_counter() ready, _ = ray.wait([chain_pipeline.remote(tensor_data)], 1) ray.wait(ready, 1) end_time = time.perf_counter() latency = end_time - start_time closed_loop_latencies.append(latency) pprint(df_row) # percentile_values = df_row.update(latency_s=closed_loop_latencies) self._df = self._df.append(df_row, ignore_index=True) # cleanup del closed_loop_latencies, chain_pipeline serve_reference.shutdown()
def main(): TAG = "Resnet18" min_img_size = 224 transform = transforms.Compose([ transforms.Resize(min_img_size), transforms.ToTensor(), transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ]) for num_replica in range(1, 9): # initialize serve serve_reference.init(start_server=False) serve_handle = None with serve_reference.using_router(TAG): serve_reference.create_endpoint(TAG) config = serve_reference.BackendConfig(max_batch_size=8, num_replicas=num_replica, num_gpus=1) serve_reference.create_backend( PredictModelPytorch, TAG, transform, "resnet18", True, backend_config=config, ) serve_reference.link(TAG, TAG) serve_handle = serve_reference.get_handle(TAG) img = base64.b64encode(open("elephant.jpg", "rb").read()) # warmup ready_refs, _ = ray.wait( [serve_handle.remote(data=img) for _ in range(200)], 200) complete_oids, _ = ray.wait(ray.get(ready_refs), num_returns=200) del ready_refs del complete_oids qps = throughput_calculation(serve_handle, {"data": img}, 2000) print(f"[Resnet18] Batch Size: 8 Replica: {num_replica} " f"Throughput: {qps} QPS") serve_reference.shutdown()
def run(self): for vertex_config in self.config["vertex_configs"]: serve_reference.init(start_server=False) filename_query = "arrival_trace.jsonl" route = "/prepoc" pipeline = ImagePrepocPipeline(vertex_config, self.config["model_type"]) vertex_config_name = json.dumps(vertex_config) df_row = dict( vertex_config=vertex_config_name, serving_type=self.config["serving_type"], arrival_process=self.config["arrival_process"], ) image_path = os.path.join(ROOT_DIR, self.config["image_file_path"]) tensor_data = base64.b64encode(open(image_path, "rb").read()) throughput_qps = self._throughput_calculation( pipeline, tensor_data, self.config["num_requests"]) df_row.update(throughput_qps=throughput_qps) pprint(df_row) # closed loop latency calculation closed_loop_latencies = list() for _ in range(self.config["num_requests"]): start_time = time.perf_counter() ready, _ = ray.wait([pipeline.remote(tensor_data)], 1) ray.wait(ready, 1) end_time = time.perf_counter() latency = end_time - start_time closed_loop_latencies.append(latency) df_row.update(latency_s=closed_loop_latencies) self._df = self._df.append(df_row, ignore_index=True) # cleanup del closed_loop_latencies, pipeline serve_reference.shutdown()