def push(self, request): # Convert input format request = request_wrapper.protoToDict(request, self.input_shapes, stack=STACK_CHANNELS) # Send to FPGA in_slot = self.in_index % self.n_streams self.fpgaRT.exec_async(request, self.output_buffers[in_slot], in_slot) self.in_index += 1
def imagenet_client(file_name, n, print_interval=50): print( "Sending {n} Imagenet images using batch size {batch_size}...".format( n=n, batch_size=BATCH_SIZE)) assert (n % BATCH_SIZE == 0) start_time = time.time() requests = list(imagenet_request_generator(file_name, n)) total_time = time.time() - start_time print("Image load time: {time:.2f}".format(time=total_time)) start_time = time.time() predictions = [] # Connect to server with grpc.insecure_channel('{address}:{port}'.format( address=SERVER_ADDRESS, port=SERVER_PORT)) as channel: stub = inference_server_pb2_grpc.InferenceStub(channel) # Make a call def it(): for request in requests: yield request responses = stub.Inference(it()) # Get responses for i, response in enumerate(responses): if i % print_interval == 0: print(i) response = request_wrapper.protoToDict( response, {OUTPUT_NODE_NAME: (BATCH_SIZE, 1000)}) prediction = np.argmax(response[OUTPUT_NODE_NAME], axis=1) predictions.append(prediction) total_time = time.time() - start_time print( "Sent {n} images in {time:.3f} seconds ({speed:.3f} images/s), excluding image load time" .format(n=n, time=total_time, speed=float(n) / total_time)) labels = list(imagenet_label_generator(file_name, n)) # print(predictions) # print(labels) predictions = np.array(predictions).reshape((-1)) labels = np.array(labels).reshape((-1)) # print(predictions) # print(labels) print("Accuracy: {acc:.4}".format( acc=metrics.accuracy_score(labels, predictions)))
def process_inference(request): input_dict = request_wrapper.protoToDict(request)
def imagenet_client(file_name, n, print_interval=50): print( "Sending {n} Imagenet images using batch size {batch_size}...".format( n=n, batch_size=BATCH_SIZE)) #assert(n % BATCH_SIZE == 0) reminder = n % BATCH_SIZE if reminder == 0: start_time = time.time() requests = list(imagenet_request_generator(file_name, n)) total_time = time.time() - start_time print("Image load time: {time:.2f}".format(time=total_time)) start_time = time.time() predictions = [] # Connect to server with grpc.insecure_channel('{address}:{port}'.format( address=SERVER_ADDRESS, port=SERVER_PORT)) as channel: stub = inference_server_pb2_grpc.InferenceStub(channel) # Make a call def it(): for request in requests: yield request responses = stub.Inference(it()) # Get responses for i, response in enumerate(responses): if i % print_interval == 0: print(i) response = request_wrapper.protoToDict( response, {OUTPUT_NODE_NAME: (BATCH_SIZE, 1000)}) prediction = np.argmax(response[OUTPUT_NODE_NAME], axis=1) predictions.append(prediction) total_time = time.time() - start_time print( "Sent {n} images in {time:.3f} seconds ({speed:.3f} images/s), excluding image load time" .format(n=n, time=total_time, speed=float(n) / total_time)) labels = list(imagenet_label_generator(file_name, n)) # print(predictions) # print(labels) predictions = np.array(predictions).reshape((-1)) labels = np.array(labels).reshape((-1)) # print(predictions) # print(labels) print("Accuracy: {acc:.4}".format( acc=metrics.accuracy_score(labels, predictions))) return total_time, float(n) / total_time, metrics.accuracy_score( labels, predictions) elif reminder != 0: main_part = n - reminder extra_part = BATCH_SIZE print("main part:{0}, extra_part:{1}".format(main_part, extra_part)) start_time = time.time() requests_main = list(imagenet_request_generator(file_name, main_part)) time_main = time.time() - start_time time_sub = time.time() requests_sub = list(imagenet_request_generator(file_name, extra_part)) time_sub = (time.time() - time_sub) * (reminder / extra_part) total_time = time_main + time_sub print("Image load time: {time:.2f}".format(time=total_time)) start_time = time.time() predictions = [] # Connect to server with grpc.insecure_channel('{address}:{port}'.format( address=SERVER_ADDRESS, port=SERVER_PORT)) as channel: stub = inference_server_pb2_grpc.InferenceStub(channel) # Make a call def it(requests): for request in requests: yield request responses_main = stub.Inference(it(requests_main)) # responses_sub = stub.Inference(it(requests_sub)) # Get responses for i, response in enumerate(responses_main): if i % print_interval == 0: print(i) response = request_wrapper.protoToDict( response, {OUTPUT_NODE_NAME: (BATCH_SIZE, 1000)}) prediction = np.argmax(response[OUTPUT_NODE_NAME], axis=1) predictions.append(prediction) time_main = time.time() - start_time start_time = time.time() with grpc.insecure_channel('{address}:{port}'.format( address=SERVER_ADDRESS, port=SERVER_PORT)) as channel: stub = inference_server_pb2_grpc.InferenceStub(channel) # Make a call def it(requests): for request in requests: yield request responses_sub = stub.Inference(it(requests_sub)) # Get responses for i, response in enumerate(responses_sub): if i % print_interval == 0: print(i) response = request_wrapper.protoToDict( response, {OUTPUT_NODE_NAME: (BATCH_SIZE, 1000)}) prediction = np.argmax(response[OUTPUT_NODE_NAME], axis=1) predictions.append(prediction) time_sub = time.time() - start_time total_time = time_sub * (reminder / extra_part) + time_main for i in range(1, extra_part): predictions = np.delete(predictions, main_part + extra_part - i) print( "Sent {n} images(with {extra} images for a full batch) in {time:.3f} seconds ({speed:.3f} images/s), excluding image load time" .format(n=main_part + extra_part, extra=BATCH_SIZE - reminder, time=total_time, speed=float(n) / total_time)) labels = list(imagenet_label_generator(file_name, n)) # print(predictions) # print(labels) predictions = np.array(predictions).reshape((-1)) labels = np.array(labels).reshape((-1)) # print(predictions) # print(labels) print("Accuracy: {acc:.4}".format( acc=metrics.accuracy_score(labels, predictions))) return total_time, float(n) / total_time, metrics.accuracy_score( labels, predictions)