示例#1
0
    def run(self):
        for vertex_config in self.config["vertex_configs"]:
            for arrival_config in self.config["arrival_config"]:

                serve_reference.init(start_server=False)
                filename_query = "arrival_trace.jsonl"
                route = "/prepoc"

                pipeline = ImagePrepocPipeline(vertex_config,
                                               self.config["model_type"])
                vertex_config_name = json.dumps(vertex_config)
                df_row = dict(
                    vertex_config=vertex_config_name,
                    serving_type=self.config["serving_type"],
                    arrival_process=json.dumps(arrival_config),
                )

                image_path = os.path.join(ROOT_DIR,
                                          self.config["image_file_path"])

                throughput_qps = self._throughput_calculation(
                    pipeline, image_path, arrival_config["num_requests"])
                df_row.update(throughput_qps=throughput_qps)

                pprint(df_row)

                http_actor = HTTPProxyActor.remote(
                    host="127.0.0.1",
                    port=8000,
                    serving_backend=self.config["serving_type"],
                    filename=filename_query,
                )
                ray.get(
                    http_actor.register_route.remote(route,
                                                     pipeline.chain_handle))
                go_client_path = os.path.join(ROOT_DIR,
                                              self.config["client_path"])

                arrival_curve = generate_fixed_arrival_process(
                    **arrival_config).tolist()
                arrival_curve_str = [str(x) for x in arrival_curve]
                ls_output = subprocess.Popen([
                    "go",
                    "run",
                    go_client_path,
                    image_path,
                    route,
                    *arrival_curve_str,
                ])
                ls_output.communicate()

                latency_s = get_latency(filename_query)
                os.remove(filename_query)

                df_row.update(latency_s=latency_s)
                self._df = self._df.append(df_row, ignore_index=True)

                # cleanup
                del latency_s, pipeline, arrival_curve, arrival_curve_str
                serve_reference.shutdown()
示例#2
0
    def run(self):

        tensor_data = construct_tensor(self.config)
        for batch_size, pipeline_length in product(
                self.config["max_batch_sizes"],
                self.config["pipeline_lengths"]):
            df_row = dict(
                batch_size=batch_size,
                pipeline_length=pipeline_length,
                tensor_type=self.config["tensor_type"],
                tensor_shape="x".join(
                    [str(shape) for shape in self.config["tensor_shape"]]),
                serving_type=self.config["serving_type"],
                arrival_process=self.config["arrival_process"],
            )

            # initialize serve
            serve_reference.init(start_server=False)

            chain_pipeline = Chain(max_batch_size=batch_size,
                                   pipeline_length=pipeline_length)

            # warmup
            ready_refs, _ = ray.wait(
                [chain_pipeline.remote(tensor_data) for _ in range(200)], 200)
            ray.wait(ready_refs, num_returns=200)
            del ready_refs

            qps = self._throughput_calculation(chain_pipeline, tensor_data)
            df_row.update(throughput_qps=qps)

            serve_reference.clear_trace()

            # closed loop latency calculation
            closed_loop_latencies = list()
            for _ in range(self.config["num_requests"]):
                start_time = time.perf_counter()
                ready, _ = ray.wait([chain_pipeline.remote(tensor_data)], 1)
                ray.wait(ready, 1)
                end_time = time.perf_counter()
                latency = end_time - start_time
                closed_loop_latencies.append(latency)

            pprint(df_row)
            # percentile_values =
            df_row.update(latency_s=closed_loop_latencies)

            self._df = self._df.append(df_row, ignore_index=True)

            # cleanup
            del closed_loop_latencies, chain_pipeline
            serve_reference.shutdown()
示例#3
0
def main():

    TAG = "Resnet18"
    min_img_size = 224
    transform = transforms.Compose([
        transforms.Resize(min_img_size),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
        ),
    ])

    for num_replica in range(1, 9):
        # initialize serve
        serve_reference.init(start_server=False)

        serve_handle = None
        with serve_reference.using_router(TAG):
            serve_reference.create_endpoint(TAG)
            config = serve_reference.BackendConfig(max_batch_size=8,
                                                   num_replicas=num_replica,
                                                   num_gpus=1)
            serve_reference.create_backend(
                PredictModelPytorch,
                TAG,
                transform,
                "resnet18",
                True,
                backend_config=config,
            )
            serve_reference.link(TAG, TAG)
            serve_handle = serve_reference.get_handle(TAG)

        img = base64.b64encode(open("elephant.jpg", "rb").read())

        # warmup
        ready_refs, _ = ray.wait(
            [serve_handle.remote(data=img) for _ in range(200)], 200)
        complete_oids, _ = ray.wait(ray.get(ready_refs), num_returns=200)
        del ready_refs
        del complete_oids

        qps = throughput_calculation(serve_handle, {"data": img}, 2000)
        print(f"[Resnet18] Batch Size: 8 Replica: {num_replica} "
              f"Throughput: {qps} QPS")

        serve_reference.shutdown()
示例#4
0
    def run(self):
        for vertex_config in self.config["vertex_configs"]:

            serve_reference.init(start_server=False)
            filename_query = "arrival_trace.jsonl"
            route = "/prepoc"

            pipeline = ImagePrepocPipeline(vertex_config,
                                           self.config["model_type"])
            vertex_config_name = json.dumps(vertex_config)
            df_row = dict(
                vertex_config=vertex_config_name,
                serving_type=self.config["serving_type"],
                arrival_process=self.config["arrival_process"],
            )

            image_path = os.path.join(ROOT_DIR, self.config["image_file_path"])
            tensor_data = base64.b64encode(open(image_path, "rb").read())

            throughput_qps = self._throughput_calculation(
                pipeline, tensor_data, self.config["num_requests"])
            df_row.update(throughput_qps=throughput_qps)

            pprint(df_row)

            # closed loop latency calculation
            closed_loop_latencies = list()
            for _ in range(self.config["num_requests"]):
                start_time = time.perf_counter()
                ready, _ = ray.wait([pipeline.remote(tensor_data)], 1)
                ray.wait(ready, 1)
                end_time = time.perf_counter()
                latency = end_time - start_time
                closed_loop_latencies.append(latency)

            df_row.update(latency_s=closed_loop_latencies)

            self._df = self._df.append(df_row, ignore_index=True)

            # cleanup
            del closed_loop_latencies, pipeline
            serve_reference.shutdown()