def main(args): input_file_path = args.input_image serialized_plan_fp32 = args.engine_file HEIGHT = args.height WIDTH = args.width image = np.asarray(Image.open(input_file_path)) img = rescale_image(image, (HEIGHT, WIDTH), order=1) im = np.array(img, dtype=np.float32, order='C') im = im.transpose((2, 0, 1)) im = sub_mean_chw(im) engine = eng.load_engine(trt_runtime, serialized_plan_fp32) h_input, d_input, h_output, d_output, stream = inf.allocate_buffers( engine, 1, trt.float32) out = inf.do_inference(engine, im, h_input, d_input, h_output, d_output, stream, 1, HEIGHT, WIDTH) out = color_map(out) colorImage_trt = Image.fromarray(out.astype(np.uint8)) colorImage_trt.save('trt_output.png') semantic_model = keras.models.load_model(args.hdf5_file) out_keras = semantic_model.predict(im.reshape(-1, 3, HEIGHT, WIDTH)) out_keras = color_map(out_keras) colorImage_k = Image.fromarray(out_keras.astype(np.uint8)) colorImage_k.save('keras_output.png')
def main(args): serialized_plan_fp32 = args.engine_file print("[INFO] Loading Engine...") engine = eng.load_engine(trt_runtime, serialized_plan_fp32) print("[INFO] Allocate Buffer...") print("[INFO] Apply Inference...") disp_tensors_pred = [] disp_tensors_gt = [] for i in range(config.NUM_VAL//config.batch_size): (force_tensor,disp_tensor_gt) = next(gen) h_input, d_input, h_output, d_output, stream = inf.allocate_buffers(engine, config.batch_size, trt.float16)#batch_size start = time.time() TensorRT_pred = inf.do_inference(engine, force_tensor, h_input, d_input, h_output, d_output, stream, config.batch_size) #batch_size end = time.time() print("inference time including buffer copy", end-start) print("TensorRT_pred",TensorRT_pred.shape) disp_tensors_pred.append(TensorRT_pred) disp_tensors_gt.append(disp_tensor_gt) #break disp_tensors_pred = np.asarray(disp_tensors_pred).reshape(-1,config.data_shape[0],config.data_shape[1],config.data_shape[2],config.data_shape[3]) disp_tensors_gt = np.asarray(disp_tensors_gt).reshape(-1,config.data_shape[0],config.data_shape[1],config.data_shape[2],config.data_shape[3]) print(disp_tensors_pred.shape) Visualize.gen_video( disp_tensors_pred, disp_tensors_gt, config) #visualize the results
def load_trt(): # load trt engine load_tensorrt = timer("Load TRT Engine") trt_path = 'alexnet.trt' engine = load_engine(trt_runtime, trt_path) load_tensorrt.end() return engine
def __init__(self, label_file, model_file): self.labels = self.load_labels(label_file) self.engine = eng.load_engine(trt_runtime, model_file) self.h_input, self.d_input, self.h_output, self.d_output, self.stream = inf.allocate_buffers( self.engine, 1, trt.float32) self.context = self.engine.create_execution_context() self.width = 224 self.height = 224
def __init__(self, trt_engine_path, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1): """Initializes TensorRT objects needed for model inference. Args: trt_engine_path (str): path where TensorRT engine should be stored trt_engine_datatype (trt.DataType): requested precision of TensorRT engine used for inference batch_size (int): batch size for which engine should be optimized for """ # We first load all custom plugins shipped with TensorRT, # some of them will be needed during inference trt.init_libnvinfer_plugins(TRT_LOGGER, '') # Initialize runtime needed for loading TensorRT engine from file self.trt_runtime = trt.Runtime(TRT_LOGGER) # TRT engine placeholder self.trt_engine = None # Display requested engine settings to stdout print("TensorRT inference engine settings:") print(" * Inference precision - {}".format(trt_engine_datatype)) print(" * Max batch size - {}\n".format(batch_size)) # If we get here, the file with engine exists, so we can load it if not self.trt_engine: print("Loading cached TensorRT engine from {}".format( trt_engine_path)) self.trt_engine = engine_utils.load_engine(self.trt_runtime, trt_engine_path) # This allocates memory for network inputs/outputs on both CPU and GPU self.inputs, self.outputs, self.bindings, self.stream = \ engine_utils.allocate_buffers(self.trt_engine) # Execution context is needed for inference self.context = self.trt_engine.create_execution_context() # Allocate memory for multiple usage [e.g. multiple batch inference] input_volume = trt.volume((3, 300, 300)) self.numpy_array = np.zeros( (self.trt_engine.max_batch_size, input_volume))
def sub_mean_chw(data): data = data.transpose((1, 2, 0)) # CHW -> HWC data -= np.array(MEAN) # Broadcast subtract data = data.transpose((2, 0, 1)) # HWC -> CHW return data def rescale_image(image, output_shape, order=1): image = skimage.transform.resize(image, output_shape, order=order, preserve_range=True, mode='reflect') return image import engine as eng import inference as inf import tensorrt as trt input_file_path = "data/yolact_example_0.png" serialized_plan_fp32 = "my_engine.trt" HEIGHT = 550 WIDTH = 550 import cv2 img = cv2.imread(input_file_path) print(img.shape) dim = (WIDTH, HEIGHT) img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA) print(img.shape) engine = eng.load_engine(trt_runtime, serialized_plan_fp32) h_input, d_input, h_output, d_output, stream = inf.allocate_buffers(engine, 1, trt.float32) out = inf.do_inference(engine, img, h_input, d_input, h_output, d_output, stream, 1, HEIGHT, WIDTH)
TRT_LOGGER = trt.Logger(trt.Logger.WARNING) trt_runtime = trt.Runtime(TRT_LOGGER) def load_data(path): trans = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor()]) img = Image.open(path) img_tensor = trans(img).unsqueeze(0) return np.array(img_tensor) # load trt engine load_trt = timer("Load TRT Engine") trt_path = 'alexnet.trt' engine = load_engine(trt_runtime, trt_path) load_trt.end() # allocate buffers inputs, outputs, bindings, stream = common.allocate_buffers(engine) # load data inputs[0].host = load_data('../test_photo.jpg') # inference infer_trt = timer("TRT Inference") with engine.create_execution_context() as context: trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
faces_embeddings = normalize_vectors(faces_embeddings) detector = MTCNN() face_array = extract_face_from_image(input_file_path, detector) face_pixels = face_array # scale pixel values face_pixels = face_pixels.astype('float32') # standardize pixel values across channels (global) mean, std = face_pixels.mean(), face_pixels.std() face_pixels = (face_pixels - mean) / std # transform face into one sample samples = np.expand_dims(face_pixels, axis=0) # make prediction to get embedding engine = eng.load_engine(trt_runtime, engine_path) h_input, d_input, h_output, d_output, stream = inf.allocate_buffers( engine, 1, trt.float32) yhat = inf.do_inference(engine, samples, h_input, d_input, h_output, d_output, stream, 1, HEIGHT, WIDTH) print(yhat.shape) face_to_predict_embedding = normalize_vectors(yhat) result = predict_using_min_l2_distance(faces_embeddings, labels, face_to_predict_embedding) print('Predicted name: %s' % (str(result).title()))
def main(args): print('Platform: {}'.format(platform.platform())) trt_runtime = trt.Runtime(trt.Logger(trt.Logger.WARNING)) f = open(args.trtmodel, "rb") engine = trt_runtime.deserialize_cuda_engine(f.read()) context = engine.create_execution_context() input_shape = engine.get_binding_shape(0) output_shape = engine.get_binding_shape(1) print('input shape: {}'.format(input_shape)) print('output shape: {}'.format(output_shape)) images = [ 'testtrt/000000001761.jpg', 'testtrt/000000119088.jpg', 'testtrt/000000139099.jpg', 'testtrt/000000143998.jpg', 'testtrt/000000222235.jpg', 'testtrt/000000276707.jpg', 'testtrt/000000386134.jpg', 'testtrt/000000428218.jpg', 'testtrt/000000530854.jpg', 'testtrt/000000538067.jpg' ] engine = eng.load_engine(trt_runtime, args.trtmodel) #h_input, d_input, h_output, d_output, stream = inf.allocate_buffers(engine, 1, trt.float32) inputBuffer = np.zeros(input_shape) output = np.empty(output_shape[1], dtype=np.float32) # Allocate device memory d_input = cuda.mem_alloc(1 * inputBuffer.nbytes) d_output = cuda.mem_alloc(1 * output.nbytes) bindings = [int(d_input), int(d_output)] stream = cuda.Stream() def predict_batch(input_data, d_input, stream, bindings, output, d_output): # result gets copied into output # Transfer input_data to device cuda.memcpy_htod_async(d_input, input_data, stream) # Execute model context.execute_async(1, bindings, stream.handle, None) # Transfer predictions back cuda.memcpy_dtoh_async(output, d_output, stream) # Syncronize threads stream.synchronize() return output print("Load model and dependencies...") predict_batch(inputBuffer, d_input, stream, bindings, output, d_output) print("Begin inferences") dtSum = 0.0 for image in images: img = cv2.imread(image, 0) img = resize_with_crop_or_pad(img, [input_shape[1], input_shape[2]]) # Using current time initial = datetime.now() predict_batch(img.astype(np.float32), d_input, stream, bindings, output, d_output) prediction = np.argmax(output) dt = (datetime.now() - initial).total_seconds() dtSum += dt print("Prediction: {} dt {}".format(prediction, dt)) print("Average time {}".format(dtSum / len(images)))
def main(): # set up arguments run_case = args.case num_images = int(args.num_img) model_name = args.model_name # set up logging to file - see previous section for more details log_filename = "logs/output_tensorrt_{}_{}.log".format( model_name, run_case) os.makedirs(os.path.dirname(log_filename), exist_ok=True) for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) logging.basicConfig( level=logging.DEBUG, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', datefmt='%m-%d %H:%M', filename=log_filename, filemode='w') logging.getLogger('tensorrt') logging.info("#### start model prediction ####") # logger to capture errors, warnings, and other information during the build and inference phases TRT_LOGGER = trt.Logger(trt.Logger.WARNING) trt_runtime = trt.Runtime(TRT_LOGGER) # check model directory os.makedirs(os.path.dirname("model/"), exist_ok=True) # check data availability logging.info("check if data is available") logging.info("check data") if not os.path.exists("data"): return logging.error( 'check if data is available or run download_images.py for generating sample dataset' ) batch_size = 1 # running inference from tensorrt logging.info("check engine") engine_name = "model/" + model_name + "_" + run_case + ".plan" if os.path.exists(engine_name): engine = eng.load_engine(trt_runtime, engine_name) else: onnx_path = "model/" + model_name + "_" + run_case + ".onnx" if not os.path.exists(onnx_path): logging.info("convert_torch_to_onnx") convert_torch_to_onnx(model_name) logging.info("build_engine_from_onnx") engine = build_engine_from_onnx(onnx_path, engine_name, batch_size, TRT_LOGGER) logging.info("start interference") data_type = trt.float32 logging.info("allocate_buffers") h_input_1, d_input_1, h_output, d_output, stream = allocate_buffers( engine, batch_size, data_type) # predict images logging.info("do_inference") predict_images(num_images, engine, h_input_1, d_input_1, h_output, d_output, stream) return