def init_inference(): global model global device model = DQN(120, 320, DISCRETIZATION) model.eval() if args.trt_module: from torch2trt import TRTModule if args.trt_conversion: model.load_state_dict(torch.load(args.pretrained_model)) model = model.cuda() x = torch.ones((1, 3, 120, 320)).cuda() from torch2trt import torch2trt model_trt = torch2trt(model, [x], max_batch_size=100, fp16_mode=True) torch.save(model_trt.state_dict(), args.trt_model) exit() model_trt = TRTModule() model_trt.load_state_dict(torch.load(args.trt_model)) model = model_trt.to(device) else: model.load_state_dict(torch.load(args.pretrained_model)) model = model.to(device)
def __init__( self, model, exp, cls_names=COCO_CLASSES, trt_file=None, decoder=None, device="cpu", ): self.model = model self.cls_names = cls_names self.decoder = decoder self.num_classes = exp.num_classes self.confthre = exp.test_conf self.nmsthre = exp.nmsthre self.test_size = exp.test_size self.device = device if trt_file is not None: from torch2trt import TRTModule model_trt = TRTModule() model_trt.load_state_dict(torch.load(trt_file)) x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() self.model(x) self.model = model_trt self.rgb_means = (0.485, 0.456, 0.406) self.std = (0.229, 0.224, 0.225)
def export_siamfcpp_track_fea_trt(task_cfg, parsed_args): """ export phase "freeze_track_fea" (basemodel/c_x/r_x) to trt model """ model = model_builder.build("track", task_cfg.model) model.eval().cuda() model.phase = "freeze_track_fea" search_im = torch.randn(1, 3, 303, 303).cuda() fea = model(search_im) output_path = parsed_args.output + "_track_fea.trt" logger.info("start cvt pytorch model") model_trt = torch2trt(model, [search_im]) torch.save(model_trt.state_dict(), output_path) logger.info("save trt model to {}".format(output_path)) model_trt = TRTModule() model_trt.load_state_dict(torch.load(output_path)) trt_outs = model_trt(search_im) np.testing.assert_allclose(to_numpy(fea[0]), to_numpy(trt_outs[0]), rtol=1e-03, atol=1e-05) np.testing.assert_allclose(to_numpy(fea[1]), to_numpy(trt_outs[1]), rtol=1e-03, atol=1e-05) logger.info("test accuracy ok")
def load_trt_model(model_path): from torch2trt import TRTModule print("Loading TensorRT optimized model") model = TRTModule() model.load_state_dict(torch.load(model_path)) return model
def __init__(self, model, exp, trt_file=None, decoder=None, device=torch.device("cpu"), fp16=False): self.model = model self.decoder = decoder self.num_classes = exp.num_classes self.confthre = exp.test_conf self.nmsthre = exp.nmsthre self.test_size = exp.test_size self.device = device self.fp16 = fp16 if trt_file is not None: from torch2trt import TRTModule model_trt = TRTModule() model_trt.load_state_dict(torch.load(trt_file)) x = torch.ones((1, 3, exp.test_size[0], exp.test_size[1]), device=device) self.model(x) self.model = model_trt self.rgb_means = (0.485, 0.456, 0.406) self.std = (0.229, 0.224, 0.225)
def __init__( self, model, exp, cls_names=COCO_CLASSES, trt_file=None, decoder=None, device="cpu", fp16=False, legacy=False, ): self.model = model self.cls_names = cls_names self.decoder = decoder self.num_classes = exp.num_classes self.confthre = exp.test_conf self.nmsthre = exp.nmsthre self.test_size = exp.test_size self.device = device self.fp16 = fp16 self.preproc = ValTransform(legacy=legacy) if trt_file is not None: from torch2trt import TRTModule model_trt = TRTModule() model_trt.load_state_dict(torch.load(trt_file)) x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() self.model(x) self.model = model_trt
def init_inference(): global model global device if args.model == 'resnet18': model = models.resnet18() model.fc = torch.nn.Linear(512, 3) elif args.model == 'samplenet': model = SampleNet() elif args.model == 'simplenet': model = SimpleNet() else: raise NotImplementedError() model.eval() #model.load_state_dict(torch.load(args.pretrained_model)) if args.trt_module: from torch2trt import TRTModule if args.trt_conversion: model.load_state_dict(torch.load(args.pretrained_model)) model = model.cuda() x = torch.ones((1, 3, 240, 320)).cuda() from torch2trt import torch2trt model_trt = torch2trt(model, [x], max_batch_size=100, fp16_mode=True) #model_trt = torch2trt(model, [x], max_batch_size=100) torch.save(model_trt.state_dict(), args.trt_model) exit() model_trt = TRTModule() #model_trt.load_state_dict(torch.load('road_following_model_trt_half.pth')) model_trt.load_state_dict(torch.load(args.trt_model)) model = model_trt.to(device) else: model.load_state_dict(torch.load(args.pretrained_model)) model = model.to(device)
class ResDownS(nn.Module): def __init__(self, inplane, outplane): super(ResDownS, self).__init__() self.downsample = nn.Sequential( nn.Conv2d(inplane, outplane, kernel_size=1, bias=False), nn.BatchNorm2d(outplane)) self.downsample_15 = self.downsample_31 = self.downsample def init_trt(self,fp16_mode,trt_weights_path): if not path.exists(trt_weights_path+'/downsample_15_trt.pth'): x_ds_15 = torch.ones((1,1024,15,15)).cuda() x_ds_31 = torch.ones((1,1024,31,31)).cuda() self.downsample_15 = torch2trt(self.downsample,[x_ds_15],fp16_mode=fp16_mode) self.downsample_31 = torch2trt(self.downsample,[x_ds_31],fp16_mode=fp16_mode) torch.save(self.downsample_15.state_dict(), trt_weights_path+'/downsample_15_trt.pth') torch.save(self.downsample_31.state_dict(), trt_weights_path+'/downsample_31_trt.pth') else: self.downsample_15 = TRTModule() self.downsample_15.load_state_dict(torch.load(trt_weights_path+'/downsample_15_trt.pth')) self.downsample_31 = TRTModule() self.downsample_31.load_state_dict(torch.load(trt_weights_path+'/downsample_31_trt.pth')) def forward(self, x): if x.shape[-1] == 15: x = self.downsample_15(x) elif x.shape[-1] == 31: x = self.downsample_31(x) else: x = self.downsample(x) if x.size(3) < 20: l = 4 r = -4 x = x[:, :, l:r, l:r] return x
def build_tensorrt(trt_file, model, size, device, recompile=False, fp16=True): from torch2trt import torch2trt, TRTModule import tensorrt as trt x = torch.ones(1, 3, int(size[1]), int(size[0])).to(device) if path.isfile(trt_file) and not recompile: print("Found TensorRT model file, loading...") # try: trt_model = TRTModule() weights = torch.load(trt_file) trt_model.load_state_dict(weights) trt_model(x) return trt_model # except Exception as e: # print("Error occured: ") # print(e) print("Compiling with tensorRT...") trt_model = torch2trt(model, [x], max_workspace_size=1 << 27, fp16_mode=fp16, log_level=trt.Logger.INFO, strict_type_constraints=True, max_batch_size=1) torch.save(trt_model.state_dict(), trt_file) return trt_model
def process_images(images: list, trt: bool): timest = time.time() if trt: # x = torch.ones((1, 3, 224, 224)).cuda() # model = alexnet(pretrained=True).eval().cuda() # model_trt = torch2trt(model, [x]) # torch.save(model_trt.state_dict(), 'alexnet_trt.pth') # model = model_trt model = TRTModule() model.load_state_dict(torch.load('alexnet_trt.pth')) else: model = alexnet(pretrained=True).eval().cuda() print("Model load time {}".format(time.time() - timest)) timest = time.time() for image in images: index = classify_image(image, model) output_text = str(index) + ': ' + classes[index] edit = ImageDraw.Draw(image) edit.rectangle((0, image.height - 20, image.width, image.height), fill=(255, 255, 255)) edit.text((50, image.height - 15), output_text, (0, 0, 0), font=ImageFont.load_default()) image.save('./output/' + image.filename.split('/')[-1]) print("Image(s) processing time {}".format(time.time() - timest)) print('Memory allocated: ' + str(torch.cuda.memory_allocated())) print('Max memory allocated: ' + str(torch.cuda.max_memory_allocated()))
class AntiSpoofPredict(Detection): def __init__(self, device_id, weights_path): super(AntiSpoofPredict, self).__init__() self.device = torch.device("cuda:{}".format(device_id) if torch.cuda. is_available() else "cpu") self.model_trt = None self._load_model(weights_path) def _load_model(self, model_path): # define model if os.path.isfile('trt_spoof.pth'): self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load('trt_spoof.pth')) return None model_name = os.path.basename(model_path) h_input, w_input, model_type, _ = parse_model_name(model_name) self.kernel_size = get_kernel( h_input, w_input, ) self.model = MODEL_MAPPING[model_type]( conv6_kernel=self.kernel_size).to(self.device) # load model weight state_dict = torch.load(model_path, map_location=self.device) keys = iter(state_dict) first_layer_name = keys.__next__() if first_layer_name.find('module.') >= 0: from collections import OrderedDict new_state_dict = OrderedDict() for key, value in state_dict.items(): name_key = key[7:] new_state_dict[name_key] = value self.model.load_state_dict(new_state_dict) else: self.model.load_state_dict(state_dict) self.model.eval() return None def predict(self, img): test_transform = trans.Compose([ trans.ToTensor(), ]) img = test_transform(img) img = img.unsqueeze(0).to(self.device) if self.model_trt is None: self.model_trt = torch2trt(self.model, [img], fp16_mode=True) torch.save(self.model_trt.state_dict(), 'trt_spoof.pth') self.model = None # self._load_model(model_path) # self.model.eval() with torch.no_grad(): result = self.model_trt(img) # result = self.model.forward(img) result = F.softmax(result).cpu().numpy() return result
def __init__(self, modelFile, taskDescFile, csv=0, csvPath='.'): # Load the task description try: with open(taskDescFile, 'r') as f: human_pose = json.load(f) except OSError: raise PoseCaptureDescError topology = trt_pose.coco.coco_category_to_topology(human_pose) num_parts = len(human_pose['keypoints']) num_links = len(human_pose['skeleton']) # Load the base model fbase = os.path.basename(modelFile) func, self.inWidth, self.inHeight = \ PoseCaptureModel.getModelFuncName(fbase) if func is None: logging.fatal('Invalid model name: %s' % (fbase)) logging.fatal('Model name should be (.+_.+_att)_(\\d+)x(\\d+)_') raise PoseCaptureModelError('Invalid model name: %s' % (fbase)) if not hasattr(trt_pose.models, func): logging.fatal('Could not find base model function: %s' % (func)) raise PoseCaptureModelError( \ 'Could not find base model function: %s' % (func)) func = 'trt_pose.models.' + func trtFile = os.path.splitext(fbase)[0] + '_trt.pth' logging.info('Loading base model from %s' % (func)) model = eval(func)(num_parts, 2 * num_links).cuda().eval() if os.path.exists(trtFile): logging.info('Loading model from TensorRT plan file ...') model_trt = TRTModule() model_trt.load_state_dict(torch.load(trtFile)) else: logging.info('Optimizing model for TensorRT ...') model.load_state_dict(torch.load(modelFile)) data = torch.zeros((1, 3, self.inHeight, self.inWidth)).cuda() model_trt = torch2trt.torch2trt( \ model, [data], fp16_mode=True, max_workspace_size=1<<25) torch.save(model_trt.state_dict(), trtFile) self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device('cuda') self.parse_objects = ParseObjects(topology) self.draw_objects = DrawObjects(topology) self.model_trt = model_trt self.num_parts = num_parts self.csv = csv self.count = 0 if self.csv > 0: try: self._initCsv(human_pose['keypoints'], csvPath) except OSError: raise PoseCaptureCsvError
class ResDown(MultiStageFeature): def __init__(self, pretrain=False): super(ResDown, self).__init__() self.features = resnet50(layer3=True, layer4=False) self.features_127 = self.features_255 = self.features if pretrain: load_pretrain(self.features, 'resnet.model') self.downsample = ResDownS(1024, 256) self.layers = [self.downsample, self.features.layer2, self.features.layer3] self.train_nums = [1, 3] self.change_point = [0, 0.5] self.unfix(0.0) def init_trt(self,fp16_mode,trt_weights_path): if not path.exists(trt_weights_path+'/features_127_trt.pth'): x_resnet_127 = torch.ones((1,3,127,127)).cuda() x_resnet_255 = torch.ones((1,3,255,255)).cuda() self.features_127 = torch2trt(self.features,[x_resnet_127],fp16_mode=fp16_mode) self.features_255 = torch2trt(self.features,[x_resnet_255],fp16_mode=fp16_mode) torch.save(self.features_127.state_dict(), trt_weights_path+'/features_127_trt.pth') torch.save(self.features_255.state_dict(), trt_weights_path+'/features_255_trt.pth') else: self.features_127 = TRTModule() self.features_255 = TRTModule() self.features_127.load_state_dict(torch.load(trt_weights_path+'/features_127_trt.pth')) self.features_255.load_state_dict(torch.load(trt_weights_path+'/features_255_trt.pth')) self.downsample.init_trt(fp16_mode,trt_weights_path) def param_groups(self, start_lr, feature_mult=1): lr = start_lr * feature_mult def _params(module, mult=1): params = list(filter(lambda x:x.requires_grad, module.parameters())) if len(params): return [{'params': params, 'lr': lr * mult}] else: return [] groups = [] groups += _params(self.downsample) groups += _params(self.features, 0.1) return groups def forward(self, x): output = self.features_127(x) p3 = self.downsample(output[-1]) return p3 def forward_all(self, x): output = self.features_255(x) p3 = self.downsample(output[-1]) return output, p3
class run: def __init__(self): self.parser = argparse.ArgumentParser(description='TensorRT pose estimation run') self.parser.add_argument('--model', type=str, default='resnet', help='resnet or densenet') self.args = parser.parse_args() with open('human_pose.json', 'r') as f: human_pose = json.load(f) self.topology = trt_pose.coco.coco_category_to_topology(human_pose) num_parts = len(human_pose['keypoints']) num_links = len(human_pose['skeleton']) if 'resnet' in args.model: print('------ model = resnet--------') MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth' OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth' model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval() WIDTH = 224 HEIGHT = 224 else: print('------ model = densenet--------') MODEL_WEIGHTS = 'densenet121_baseline_att_256x256_B_epoch_160.pth' OPTIMIZED_MODEL = 'densenet121_baseline_att_256x256_B_epoch_160_trt.pth' model = trt_pose.models.densenet121_baseline_att(num_parts, 2 * num_links).cuda().eval() WIDTH = 256 HEIGHT = 256 data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda() self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.cap = cv2.VideoCapture(0) self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) def run(self): fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') out_video = cv2.VideoWriter('/tmp/output.mp4', fourcc, self.cap.get(cv2.CAP_PROP_FPS), (640, 480)) count = 0 while self.cap.isOpened() and count < 500: t = time.time() ret_val, dst = self.cap.read() parse_objects = ParseObjects(topology) draw_objects = DrawObjects(topology) if ret_val == False: print("Camera read Error") break img = cv2.resize(dst, dsize=(WIDTH, HEIGHT), interpolation=cv2.INTER_AREA) img = PE.execute(img, dst, t) cv2.imshow("result", img) if cv2.waitKey(1) & 0xFF == ord('q'): break count += 1 cv2.destroyAllWindows() out_video.release() cap.release()
def load_model(): model_log = log('Load {} ... '.format('alexnet & tensorrt')) model = alexnet().eval().cuda() model.load_state_dict(torch.load('alexnet.pth')) model_trt = TRTModule() model_trt.load_state_dict(torch.load('alexnet_trt.pth')) model_log.end() return (model, model_trt)
def demo_with_torch2trt(trt_file_path, data_root): model_trt = TRTModule() model_trt.load_state_dict(torch.load(trt_file_path)) row_anchor = tusimple_row_anchor img_w, img_h = 1280, 720 img_transforms = transforms.Compose([ transforms.Resize((288, 800)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) for i in range(10): key = cv2.waitKey(1) if key == ord("q"): break img_ori = cv2.imread(data_root) img = preprocessing(img_ori) img = img.unsqueeze(0) img = img.cuda() t1 = time.time() with torch.no_grad(): out = model_trt(img) col_sample = np.linspace(0, 800 - 1, 100) col_sample_w = col_sample[1] - col_sample[0] out_j = out[0].data.cpu().numpy() t2 = time.time() print("Inference time = %.3f ms" % ((t2 - t1) * 1000)) out_j = out_j[:, ::-1, :] prob = scipy.special.softmax(out_j[:-1, :, :], axis=0) idx = np.arange(100) + 1 idx = idx.reshape(-1, 1, 1) loc = np.sum(prob * idx, axis=0) out_j = np.argmax(out_j, axis=0) loc[out_j == 100] = 0 out_j = loc for i in range(out_j.shape[1]): if np.sum(out_j[:, i] != 0) > 2: for k in range(out_j.shape[0]): if out_j[k, i] > 0: ppp = (int(out_j[k, i] * col_sample_w * img_w / 800) - 1, int(img_h * (row_anchor[56 - 1 - k] / 288)) - 1) cv2.circle(img_ori, ppp, img_w // 300, (0, 255, 0), 2) cv2.imshow("result", img_ori) cv2.imwrite("demo_using_torch2trt.jpg", img_ori) cv2.destroyAllWindows()
def ETRI_Initialization(path): # Load & Init for Skeletons with open('./utils/human_pose.json', 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) parse_objects = ParseObjects(topology) print("trtPose start") model_skeleton = TRTModule() model_path = os.path.join( path, 'resnet18_baseline_att_224x224_A_epoch_249_trt_2.pth') model_skeleton.load_state_dict(torch.load(model_path)) print("body action start") model_trt_ba = TRTModule() model_path = os.path.join(path, 'bodyaction_TRT.pth') model_trt_ba.load_state_dict(torch.load(model_path)) print("hand action start") model_trt_ha = TRTModule() model_path = os.path.join(path, 'handaction_jc_c_TRT.pth') model_trt_ha.load_state_dict(torch.load(model_path)) print("headpose start") model_trt_hp = TRTModule() model_path = os.path.join(path, 'headpose_TRT.pth') model_trt_hp.load_state_dict(torch.load(model_path)) return topology, parse_objects, model_skeleton, model_trt_ba, model_trt_ha, model_trt_hp
def process_tftrt(self, input_model, output_infer_model): if os.path.exists(output_infer_model): logging.info("resnet50_pytorch_trt.pth is exist") model_trt = TRTModule() model_trt.load_state_dict(torch.load(output_infer_model)) else: # load pretrained model resnet50_model = load_pytorch_saved_model(input_model) # convert to TensorRT feeding sample data as input x = torch.ones((1, 3, 224, 224)).cuda() model_trt = torch2trt(resnet50_model, [x]) # save and load torch.save(model_trt.state_dict(), output_infer_model) return model_trt
class BackendTensorRT: def __init__(self): self.model = None def version(self): return torch.__version__ def name(self): return "pytorch-tensorrt-ofa" def load(self, args, ds=None): prefix = 'bs%d_is%d_%s_' % (args.batch_size, args.image_size, args.chip_name) lib_name = 'pretrained/' + args.model + '/' + prefix + 'torch2trt.pth' if os.path.exists(lib_name) and not args.force_build: self.model = TRTModule() self.model.load_state_dict(torch.load(lib_name)) self.model.eval() else: net, _ = load_model(args) net = net.cuda() net.eval() input_data = torch.FloatTensor( np.array(ds.get_calibration_set(), np.float32)).cuda() if args.calib_algo == 1: calib_algo = trt.CalibrationAlgoType.ENTROPY_CALIBRATION elif args.calib_algo == 2: calib_algo = trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2 elif args.calib_algo == 3: calib_algo = trt.CalibrationAlgoType.MINMAX_CALIBRATION size = 1 << (33 if 'T4' in args.chip_name else 34) self.model = torch2trt( net, [input_data], max_batch_size=args.batch_size, fp16_mode=True, max_workspace_size=size, int8_mode=True, int8_calib_algorithm=calib_algo, int8_calib_batch_size=args.calib_batch_size) torch.save(self.model.state_dict(), lib_name) log.info('model is ready') return self def predict(self, image): with torch.no_grad(): output = self.model(image) _, output = output.max(1) return output
class TRTPoseExtractor(BaseEstimator, TransformerMixin): def __init__( self, model_path='./models/resnet18_baseline_att_224x224_A_epoch_249_trt.pth' ): self.model_path = model_path with open('./models/human_pose.json', 'r') as f: self.human_pose = json.load(f) self.topology = trt_pose.coco.coco_category_to_topology( self.human_pose) self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(self.model_path)) self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device('cuda') self.parse_objects = ParseObjects(self.topology) self.get_keypoints = GetKeypoints(self.topology) def preprocess(self, image): global device device = torch.device('cuda') image = cv2.resize(image, (224, 224)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = Image.fromarray(image) image = transforms.functional.to_tensor(image).to(device) image.sub_(self.mean[:, None, None]).div_(self.std[:, None, None]) return image[None, ...] def fit(self, X, y=None): return self def transform(self, X): feat_array = [] filepath = True if isinstance(X[0], str) else False for row in X: # Read image and resize for model image = cv2.imread(row) if filepath else row data = self.preprocess(image) cmap, paf = self.model_trt(data) cmap, paf = cmap.detach().cpu(), paf.detach().cpu() counts, objects, peaks = self.parse_objects(cmap, paf) feature_vec = self.get_keypoints(image, counts, objects, peaks) feat_array.append(feature_vec) return np.array(feat_array).squeeze()
def doexecute(): print("start") CATEGORIES = ['apex'] device = torch.device('cuda') model = torchvision.models.resnet18(pretrained=False) model.fc = torch.nn.Linear(512, 2 * len(CATEGORIES)) model = model.cuda().eval().half() # model.load_state_dict(torch.load('model.pth')) model.load_state_dict(torch.load('data/model.pth')) print("1") data = torch.zeros((1, 3, 224, 224)).cuda().half() model_trt = torch2trt(model, [data], fp16_mode=True) torch.save(model_trt.state_dict(), 'road_following_model_trt.pth') print("2") model_trt = TRTModule() model_trt.load_state_dict(torch.load('road_following_model_trt.pth')) print("model load end") car = NvidiaRacecar() # Left Camera camera0 = nano.Camera(device_id=0, flip=2, width=224, height=224, fps=60) # Right Camera camera1 = nano.Camera(device_id=1, flip=2, width=224, height=224, fps=60) STEERING_GAIN = 0.75 STEERING_BIAS = 0.00 cnt = 0 while True: image = camera0.read() image = preprocess(image).half() output = model_trt(image).detach().cpu().numpy().flatten() x = float(output[0]) car.steering = x * STEERING_GAIN + STEERING_BIAS car.throttle = 0.5 print(str(cnt) + ":" + str(x) + ":") cnt = cnt + 1
def test_arcface(): model_weights = "/workspace/pretrained_models/torch/arcface_50_b1.pth" model_trt = TRTModule() model_trt.load_state_dict(torch.load(model_weights)) # x = torch.randn(1, 3, 112, 112).cuda() img0 = cv2.imread("/app/images/test/0.jpg") img1 = cv2.imread("/app/images/test/1.jpg") s = time.time() feat0 = extract_feature(img0, model_trt) print("inf. time: ", time.time() - s) print(feat0.shape) test_num = 10 s = time.time() for i in range(test_num): feat1 = extract_feature(img1, model_trt) print("inf. time: ", (time.time() - s) / test_num) dst = cosineDistance(feat0.cpu().numpy(), feat1.cpu().numpy()) print("cos dst: ", dst)
def prepare(): # 走行Button GPIO.setmode(GPIO.BOARD) GPIO.setup(recbtn, GPIO.IN) GPIO.add_event_detect(gobtn, GPIO.FALLING, callback=btn_thrd, bouncetime=200) # Left Camera camera0 = nano.Camera(device_id=0, flip=2, width=224, height=224, fps=60) # Right Camera camera1 = nano.Camera(device_id=1, flip=2, width=224, height=224, fps=60) CATEGORIES = ['apex'] device = torch.device('cuda') model = torchvision.models.resnet18(pretrained=False) model.fc = torch.nn.Linear(512, 2 * len(CATEGORIES)) model = model.cuda().eval().half() # model.load_state_dict(torch.load('model.pth')) model.load_state_dict(torch.load('data/model.pth')) data = torch.zeros((1, 3, 224, 224)).cuda().half() model_trt = torch2trt(model, [data], fp16_mode=True) torch.save(model_trt.state_dict(), 'road_following_model_trt.pth') model_trt = TRTModule() model_trt.load_state_dict(torch.load('road_following_model_trt.pth')) car = NvidiaRacecar() STEERING_GAIN = -0.75 STEERING_BIAS = 0.00 car.throttle = 0.25 cnt = 0 while True: image = camera0.read() image = preprocess(image).half() output = model_trt(image).detach().cpu().numpy().flatten() x = float(output[0]) car.steering = x * STEERING_GAIN + STEERING_BIAS print(str(cnt) + ":" + str(x) + ":") cnt = cnt + 1
class TRTOpenReIDEncoder(Encoder): def __init__(self, trt_checkpoint_path: str, img_size: Tuple[int, int] = (128, 256), max_batch_size: int = 8, **kwargs): super().__init__(**kwargs) self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(trt_checkpoint_path)) self.model_trt = self.model_trt.cuda().eval() self.size = img_size self.max_batch_size = max_batch_size self.transform = T.Compose([ T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) def _preprocess(self, im_crops): def _resize(im, size): return cv2.resize(im.astype(np.float32) / 255., size) im_batch = torch.cat([self.transform(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float() return im_batch def encode(self, detections: List[Detection], full_img: np.ndarray) -> List[object]: if len(detections) > 0: all_crops = [] full_img = cv2.cvtColor(full_img, cv2.COLOR_BGR2RGB) for detection in detections: box = detection.box crop = full_img[int(box[1]):int(box[3]), int(box[0]):int(box[2])] if crop.shape[0] * crop.shape[1] > 0: all_crops.append(crop) else: all_crops.append(np.ones((10, 10, 3)).astype(np.float32) * 255) outputs = [] for i in range(0, len(all_crops), self.max_batch_size): im_batch = self._preprocess(all_crops[i: min(len(all_crops), i + self.max_batch_size)]) im_batch = im_batch.cuda() output = self.model_trt(im_batch) outputs.append(output) outputs = torch.cat(outputs, dim=0) return outputs.cpu().detach().numpy() else: return []
class TensorRTModel(object): def __init__(self, model_name, model_path): # 1. set device self.device = 'cpu' # 'cuda:0' if torch.cuda.device_count() > 0: self.device = 'cuda:0' else: logger.error('TensorRT not working with CPU') logger.warning('Torch device {}.'.format(self.device)) self.name = model_name self.model = TRTModule() logger.info("Start loading TensorRT module, it's slow") self.model.load_state_dict(torch.load(model_path)) def tensor_to_numpy(self, torch_tensor): if isinstance(torch_tensor, tuple): output_list = [] for t in torch_tensor: output_list.append(t.cpu().numpy()) return tuple(output_list) else: return tuple([torch_tensor.cpu().numpy()]) def numpy_to_tensor(self, np_arr): if isinstance(np_arr, tuple): output_list = [] for na in np_arr: output_list.append(torch.from_numpy(na).to(self.device)) return tuple(output_list) elif isinstance(np_arr, torch.Tensor): return tuple(torch.from_numpy(numpy_arr).to(self.device)) def model_forward(self, args): with torch.no_grad(): output_tensor = self.model(*args) return output_tensor def forward(self, *args): input_torch_tensor = self.numpy_to_tensor(args) output_torch_tensor = self.model_forward(input_torch_tensor) numpy_tensor = self.tensor_to_numpy(output_torch_tensor) return numpy_tensor
def inference_with_torch2trt(trt_file_path, data_path): model_trt = TRTModule() model_trt.load_state_dict(torch.load(trt_file_path)) time_torch = 0 for i in range(10): path = data_path frame = cv2.imread(path) img = preprocessing(frame).cuda() img = img.unsqueeze(0) t3 = time.time() with torch.no_grad(): torch_outputs = model_trt(img) torch_outputs[0].data.cpu().numpy() t4 = time.time() time_torch = t4 - t3 time_torch = time_torch + time_torch print("Inference time with torch2trt = %.3f ms" % (time_torch * 1000)) return time_torch, torch_outputs
def export_siamfcpp_fea_trt(task_cfg, parsed_args): """ export phase "feature" (basemodel/c_z_k/r_z_k) to trt model """ model = model_builder.build("track", task_cfg.model) model = model.eval().cuda() model.phase = "feature" x = torch.randn(1, 3, 127, 127).cuda() fea = model(x) output_path = parsed_args.output + "_fea.trt" logger.info("start cvt pytorch model") model_trt = torch2trt(model, [x]) logger.info("save trt model to {}".format(output_path)) torch.save(model_trt.state_dict(), output_path) model_trt = TRTModule() model_trt.load_state_dict(torch.load(output_path)) trt_out = model_trt(x) np.testing.assert_allclose(to_numpy(fea[0]), to_numpy(trt_out[0]), rtol=1e-03, atol=1e-05) logger.info("test accuracy ok")
def test_arcface_mb(bs): model_weights = f"/workspace/pretrained_models/torch/arcface_50_b{bs}_fp16.pth" model_trt = TRTModule() model_trt.load_state_dict(torch.load(model_weights)) img0 = cv2.imread("/app/images/test/0.jpg") img1 = cv2.imread("/app/images/test/1.jpg") s = time.time() feat0 = extract_feature_batch([img0 for i in range(bs)], model_trt, batch_size=bs) print("inf. time: ", time.time() - s) print(feat0.shape) test_num = 10 s = time.time() for i in range(test_num): feat1 = extract_feature_batch([img1 for i in range(bs)], model_trt, batch_size=bs) print("inf. time: ", (time.time() - s) / test_num) dst = cosineDistance(feat0.cpu().numpy()[0], feat1.cpu().numpy()[0]) print("cos dst: ", dst)
def load_model_and_run(): with open('human_pose.json', 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) ut = Utils(topology) num_parts = len(human_pose['keypoints']) num_links = len(human_pose['skeleton']) model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval() MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth' model.load_state_dict(torch.load(MODEL_WEIGHTS)) WIDTH = 224 HEIGHT = 224 data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda() model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1 << 25) OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth' torch.save(model_trt.state_dict(), OPTIMIZED_MODEL) model_trt = TRTModule() model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) t0 = time.time() torch.cuda.current_stream().synchronize() for i in range(50): y = model_trt(data) torch.cuda.current_stream().synchronize() t1 = time.time() print(50.0 / (t1 - t0)) mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() std = torch.Tensor([0.229, 0.224, 0.225]).cuda() # camera = USBCamera(width=WIDTH, height=HEIGHT, capture_fps=15) camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=15) return ut, camera, model_trt
def init(): import torch2trt from torch2trt import TRTModule with open('./models/human_pose.json', 'r') as f: human_pose = json.load(f) global topology topology = coco_category_to_topology(human_pose) global WIDTH WIDTH = 256 global HEIGHT HEIGHT = 256 #data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda() OPTIMIZED_MODEL = Path('./models/densenet121_baseline_att_256x256_B_epoch_160_trt.pth') global model_trt model_trt = TRTModule() model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) print('loaded model') global mean mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() global std std = torch.Tensor([0.229, 0.224, 0.225]).cuda() global device device = torch.device('cuda') global parse_objects parse_objects = ParseObjects(topology) global draw_objects draw_objects = DrawObjects(topology)