示例#1
0
def init_inference():
    global model
    global device

    model = DQN(120, 320, DISCRETIZATION)

    model.eval()

    if args.trt_module:
        from torch2trt import TRTModule

        if args.trt_conversion:
            model.load_state_dict(torch.load(args.pretrained_model))
            model = model.cuda()
            x = torch.ones((1, 3, 120, 320)).cuda()
            from torch2trt import torch2trt
            model_trt = torch2trt(model, [x],
                                  max_batch_size=100,
                                  fp16_mode=True)
            torch.save(model_trt.state_dict(), args.trt_model)
            exit()
        model_trt = TRTModule()
        model_trt.load_state_dict(torch.load(args.trt_model))
        model = model_trt.to(device)

    else:
        model.load_state_dict(torch.load(args.pretrained_model))
        model = model.to(device)
示例#2
0
    def __init__(
        self,
        model,
        exp,
        cls_names=COCO_CLASSES,
        trt_file=None,
        decoder=None,
        device="cpu",
    ):
        self.model = model
        self.cls_names = cls_names
        self.decoder = decoder
        self.num_classes = exp.num_classes
        self.confthre = exp.test_conf
        self.nmsthre = exp.nmsthre
        self.test_size = exp.test_size
        self.device = device
        if trt_file is not None:
            from torch2trt import TRTModule

            model_trt = TRTModule()
            model_trt.load_state_dict(torch.load(trt_file))

            x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()
            self.model(x)
            self.model = model_trt
        self.rgb_means = (0.485, 0.456, 0.406)
        self.std = (0.229, 0.224, 0.225)
示例#3
0
def export_siamfcpp_track_fea_trt(task_cfg, parsed_args):
    """ export phase "freeze_track_fea" (basemodel/c_x/r_x) to trt model 
    """
    model = model_builder.build("track", task_cfg.model)
    model.eval().cuda()
    model.phase = "freeze_track_fea"
    search_im = torch.randn(1, 3, 303, 303).cuda()
    fea = model(search_im)
    output_path = parsed_args.output + "_track_fea.trt"
    logger.info("start cvt pytorch model")
    model_trt = torch2trt(model, [search_im])
    torch.save(model_trt.state_dict(), output_path)
    logger.info("save trt model to {}".format(output_path))
    model_trt = TRTModule()
    model_trt.load_state_dict(torch.load(output_path))
    trt_outs = model_trt(search_im)
    np.testing.assert_allclose(to_numpy(fea[0]),
                               to_numpy(trt_outs[0]),
                               rtol=1e-03,
                               atol=1e-05)
    np.testing.assert_allclose(to_numpy(fea[1]),
                               to_numpy(trt_outs[1]),
                               rtol=1e-03,
                               atol=1e-05)
    logger.info("test accuracy ok")
示例#4
0
def load_trt_model(model_path):
    from torch2trt import TRTModule

    print("Loading TensorRT optimized model")
    model = TRTModule()
    model.load_state_dict(torch.load(model_path))
    return model
示例#5
0
    def __init__(self,
                 model,
                 exp,
                 trt_file=None,
                 decoder=None,
                 device=torch.device("cpu"),
                 fp16=False):
        self.model = model
        self.decoder = decoder
        self.num_classes = exp.num_classes
        self.confthre = exp.test_conf
        self.nmsthre = exp.nmsthre
        self.test_size = exp.test_size
        self.device = device
        self.fp16 = fp16
        if trt_file is not None:
            from torch2trt import TRTModule

            model_trt = TRTModule()
            model_trt.load_state_dict(torch.load(trt_file))

            x = torch.ones((1, 3, exp.test_size[0], exp.test_size[1]),
                           device=device)
            self.model(x)
            self.model = model_trt
        self.rgb_means = (0.485, 0.456, 0.406)
        self.std = (0.229, 0.224, 0.225)
示例#6
0
文件: demo.py 项目: CosmosHua/GLD
    def __init__(
        self,
        model,
        exp,
        cls_names=COCO_CLASSES,
        trt_file=None,
        decoder=None,
        device="cpu",
        fp16=False,
        legacy=False,
    ):
        self.model = model
        self.cls_names = cls_names
        self.decoder = decoder
        self.num_classes = exp.num_classes
        self.confthre = exp.test_conf
        self.nmsthre = exp.nmsthre
        self.test_size = exp.test_size
        self.device = device
        self.fp16 = fp16
        self.preproc = ValTransform(legacy=legacy)
        if trt_file is not None:
            from torch2trt import TRTModule

            model_trt = TRTModule()
            model_trt.load_state_dict(torch.load(trt_file))

            x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()
            self.model(x)
            self.model = model_trt
示例#7
0
def init_inference():
    global model
    global device
    if args.model == 'resnet18':
        model = models.resnet18()
        model.fc = torch.nn.Linear(512, 3)
    elif args.model == 'samplenet':
        model = SampleNet()
    elif args.model == 'simplenet':
        model = SimpleNet()
    else:
        raise NotImplementedError()
    model.eval()
    #model.load_state_dict(torch.load(args.pretrained_model))

    if args.trt_module:
        from torch2trt import TRTModule
        if args.trt_conversion:
            model.load_state_dict(torch.load(args.pretrained_model))
            model = model.cuda()
            x = torch.ones((1, 3, 240, 320)).cuda()
            from torch2trt import torch2trt
            model_trt = torch2trt(model, [x],
                                  max_batch_size=100,
                                  fp16_mode=True)
            #model_trt = torch2trt(model, [x], max_batch_size=100)
            torch.save(model_trt.state_dict(), args.trt_model)
            exit()
        model_trt = TRTModule()
        #model_trt.load_state_dict(torch.load('road_following_model_trt_half.pth'))
        model_trt.load_state_dict(torch.load(args.trt_model))
        model = model_trt.to(device)
    else:
        model.load_state_dict(torch.load(args.pretrained_model))
        model = model.to(device)
示例#8
0
class ResDownS(nn.Module):
    def __init__(self, inplane, outplane):
        super(ResDownS, self).__init__()
        self.downsample = nn.Sequential(
                nn.Conv2d(inplane, outplane, kernel_size=1, bias=False),
                nn.BatchNorm2d(outplane))
        self.downsample_15 = self.downsample_31 = self.downsample
    def init_trt(self,fp16_mode,trt_weights_path):
        if not path.exists(trt_weights_path+'/downsample_15_trt.pth'):
            x_ds_15 = torch.ones((1,1024,15,15)).cuda()
            x_ds_31 = torch.ones((1,1024,31,31)).cuda()
            self.downsample_15 = torch2trt(self.downsample,[x_ds_15],fp16_mode=fp16_mode)
            self.downsample_31 = torch2trt(self.downsample,[x_ds_31],fp16_mode=fp16_mode)
            torch.save(self.downsample_15.state_dict(), trt_weights_path+'/downsample_15_trt.pth')
            torch.save(self.downsample_31.state_dict(), trt_weights_path+'/downsample_31_trt.pth')
        else:
            self.downsample_15 = TRTModule()
            self.downsample_15.load_state_dict(torch.load(trt_weights_path+'/downsample_15_trt.pth'))
            self.downsample_31 = TRTModule()
            self.downsample_31.load_state_dict(torch.load(trt_weights_path+'/downsample_31_trt.pth'))

    def forward(self, x):
        if x.shape[-1] == 15:
            x = self.downsample_15(x)
        elif x.shape[-1] == 31:
            x = self.downsample_31(x)
        else:
            x = self.downsample(x)
        if x.size(3) < 20:
            l = 4
            r = -4
            x = x[:, :, l:r, l:r]
        return x
示例#9
0
def build_tensorrt(trt_file, model, size, device, recompile=False, fp16=True):
    from torch2trt import torch2trt, TRTModule
    import tensorrt as trt

    x = torch.ones(1, 3, int(size[1]), int(size[0])).to(device)

    if path.isfile(trt_file) and not recompile:
        print("Found TensorRT model file, loading...")

        # try:
        trt_model = TRTModule()
        weights = torch.load(trt_file)
        trt_model.load_state_dict(weights)

        trt_model(x)
        return trt_model

        # except Exception as e:
        #     print("Error occured: ")
        #     print(e)

    print("Compiling with tensorRT...")
    trt_model = torch2trt(model, [x],
                          max_workspace_size=1 << 27,
                          fp16_mode=fp16,
                          log_level=trt.Logger.INFO,
                          strict_type_constraints=True,
                          max_batch_size=1)

    torch.save(trt_model.state_dict(), trt_file)

    return trt_model
示例#10
0
def process_images(images: list, trt: bool):
    timest = time.time()
    if trt:
        # x = torch.ones((1, 3, 224, 224)).cuda()
        # model = alexnet(pretrained=True).eval().cuda()
        # model_trt = torch2trt(model, [x])
        # torch.save(model_trt.state_dict(), 'alexnet_trt.pth')
        # model = model_trt
        model = TRTModule()
        model.load_state_dict(torch.load('alexnet_trt.pth'))
    else:
        model = alexnet(pretrained=True).eval().cuda()
    print("Model load time {}".format(time.time() - timest))

    timest = time.time()
    for image in images:
        index = classify_image(image, model)
        output_text = str(index) + ': ' + classes[index]
        edit = ImageDraw.Draw(image)
        edit.rectangle((0, image.height - 20, image.width, image.height),
                       fill=(255, 255, 255))
        edit.text((50, image.height - 15),
                  output_text, (0, 0, 0),
                  font=ImageFont.load_default())
        image.save('./output/' + image.filename.split('/')[-1])

    print("Image(s) processing time {}".format(time.time() - timest))
    print('Memory allocated: ' + str(torch.cuda.memory_allocated()))
    print('Max memory allocated: ' + str(torch.cuda.max_memory_allocated()))
示例#11
0
class AntiSpoofPredict(Detection):
    def __init__(self, device_id, weights_path):
        super(AntiSpoofPredict, self).__init__()
        self.device = torch.device("cuda:{}".format(device_id) if torch.cuda.
                                   is_available() else "cpu")
        self.model_trt = None
        self._load_model(weights_path)

    def _load_model(self, model_path):
        # define model
        if os.path.isfile('trt_spoof.pth'):
            self.model_trt = TRTModule()
            self.model_trt.load_state_dict(torch.load('trt_spoof.pth'))
            return None
        model_name = os.path.basename(model_path)
        h_input, w_input, model_type, _ = parse_model_name(model_name)
        self.kernel_size = get_kernel(
            h_input,
            w_input,
        )

        self.model = MODEL_MAPPING[model_type](
            conv6_kernel=self.kernel_size).to(self.device)

        # load model weight
        state_dict = torch.load(model_path, map_location=self.device)
        keys = iter(state_dict)
        first_layer_name = keys.__next__()
        if first_layer_name.find('module.') >= 0:
            from collections import OrderedDict
            new_state_dict = OrderedDict()
            for key, value in state_dict.items():
                name_key = key[7:]
                new_state_dict[name_key] = value
            self.model.load_state_dict(new_state_dict)
        else:
            self.model.load_state_dict(state_dict)
        self.model.eval()
        return None

    def predict(self, img):
        test_transform = trans.Compose([
            trans.ToTensor(),
        ])
        img = test_transform(img)
        img = img.unsqueeze(0).to(self.device)
        if self.model_trt is None:
            self.model_trt = torch2trt(self.model, [img], fp16_mode=True)
            torch.save(self.model_trt.state_dict(), 'trt_spoof.pth')
            self.model = None
        # self._load_model(model_path)
        # self.model.eval()
        with torch.no_grad():
            result = self.model_trt(img)
            # result = self.model.forward(img)
            result = F.softmax(result).cpu().numpy()
        return result
示例#12
0
    def __init__(self, modelFile, taskDescFile, csv=0, csvPath='.'):

        # Load the task description
        try:
            with open(taskDescFile, 'r') as f:
                human_pose = json.load(f)
        except OSError:
            raise PoseCaptureDescError
        topology = trt_pose.coco.coco_category_to_topology(human_pose)
        num_parts = len(human_pose['keypoints'])
        num_links = len(human_pose['skeleton'])

        # Load the base model
        fbase = os.path.basename(modelFile)
        func, self.inWidth, self.inHeight = \
            PoseCaptureModel.getModelFuncName(fbase)
        if func is None:
            logging.fatal('Invalid model name: %s' % (fbase))
            logging.fatal('Model name should be (.+_.+_att)_(\\d+)x(\\d+)_')
            raise PoseCaptureModelError('Invalid model name: %s' % (fbase))
        if not hasattr(trt_pose.models, func):
            logging.fatal('Could not find base model function: %s' % (func))
            raise PoseCaptureModelError( \
                'Could not find base model function: %s' % (func))
        func = 'trt_pose.models.' + func
        trtFile = os.path.splitext(fbase)[0] + '_trt.pth'
        logging.info('Loading base model from %s' % (func))
        model = eval(func)(num_parts, 2 * num_links).cuda().eval()

        if os.path.exists(trtFile):
            logging.info('Loading model from TensorRT plan file ...')
            model_trt = TRTModule()
            model_trt.load_state_dict(torch.load(trtFile))
        else:
            logging.info('Optimizing model for TensorRT ...')
            model.load_state_dict(torch.load(modelFile))
            data = torch.zeros((1, 3, self.inHeight, self.inWidth)).cuda()
            model_trt = torch2trt.torch2trt( \
                model, [data], fp16_mode=True, max_workspace_size=1<<25)
            torch.save(model_trt.state_dict(), trtFile)

        self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
        self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda()
        self.device = torch.device('cuda')

        self.parse_objects = ParseObjects(topology)
        self.draw_objects = DrawObjects(topology)
        self.model_trt = model_trt
        self.num_parts = num_parts
        self.csv = csv
        self.count = 0

        if self.csv > 0:
            try:
                self._initCsv(human_pose['keypoints'], csvPath)
            except OSError:
                raise PoseCaptureCsvError
示例#13
0
class ResDown(MultiStageFeature):
    def __init__(self, pretrain=False):
        super(ResDown, self).__init__()
        self.features = resnet50(layer3=True, layer4=False)
        self.features_127 = self.features_255 = self.features
        if pretrain:
            load_pretrain(self.features, 'resnet.model')

        self.downsample = ResDownS(1024, 256)

        self.layers = [self.downsample, self.features.layer2, self.features.layer3]
        self.train_nums = [1, 3]
        self.change_point = [0, 0.5]

        self.unfix(0.0)
    
    def init_trt(self,fp16_mode,trt_weights_path):
        if not path.exists(trt_weights_path+'/features_127_trt.pth'):
            x_resnet_127 = torch.ones((1,3,127,127)).cuda()
            x_resnet_255 = torch.ones((1,3,255,255)).cuda()
            self.features_127 = torch2trt(self.features,[x_resnet_127],fp16_mode=fp16_mode)
            self.features_255 = torch2trt(self.features,[x_resnet_255],fp16_mode=fp16_mode)
            torch.save(self.features_127.state_dict(), trt_weights_path+'/features_127_trt.pth')
            torch.save(self.features_255.state_dict(), trt_weights_path+'/features_255_trt.pth')
        else:
            self.features_127 = TRTModule()
            self.features_255 = TRTModule()
            self.features_127.load_state_dict(torch.load(trt_weights_path+'/features_127_trt.pth'))
            self.features_255.load_state_dict(torch.load(trt_weights_path+'/features_255_trt.pth'))

        self.downsample.init_trt(fp16_mode,trt_weights_path)

    def param_groups(self, start_lr, feature_mult=1):
        lr = start_lr * feature_mult

        def _params(module, mult=1):
            params = list(filter(lambda x:x.requires_grad, module.parameters()))
            if len(params):
                return [{'params': params, 'lr': lr * mult}]
            else:
                return []

        groups = []
        groups += _params(self.downsample)
        groups += _params(self.features, 0.1)
        return groups

    def forward(self, x):
        output = self.features_127(x)
        p3 = self.downsample(output[-1])
        return p3

    def forward_all(self, x):
        output = self.features_255(x)
        p3 = self.downsample(output[-1])
        return output, p3
示例#14
0
class run:
    def __init__(self):
        self.parser = argparse.ArgumentParser(description='TensorRT pose estimation run')
        self.parser.add_argument('--model', type=str, default='resnet', help='resnet or densenet')
        self.args = parser.parse_args()
        with open('human_pose.json', 'r') as f:
            human_pose = json.load(f)
        self.topology = trt_pose.coco.coco_category_to_topology(human_pose)
        num_parts = len(human_pose['keypoints'])
        num_links = len(human_pose['skeleton'])
        if 'resnet' in args.model:
            print('------ model = resnet--------')
            MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth'
            OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth'
            model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval()
            WIDTH = 224
            HEIGHT = 224

        else:
            print('------ model = densenet--------')
            MODEL_WEIGHTS = 'densenet121_baseline_att_256x256_B_epoch_160.pth'
            OPTIMIZED_MODEL = 'densenet121_baseline_att_256x256_B_epoch_160_trt.pth'
            model = trt_pose.models.densenet121_baseline_att(num_parts, 2 * num_links).cuda().eval()
            WIDTH = 256
            HEIGHT = 256
        data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()
        self.model_trt = TRTModule()
        self.model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))
        mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
        std = torch.Tensor([0.229, 0.224, 0.225]).cuda()
        self.cap = cv2.VideoCapture(0)
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

    def run(self):
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        out_video = cv2.VideoWriter('/tmp/output.mp4', fourcc, self.cap.get(cv2.CAP_PROP_FPS), (640, 480))
        count = 0
        while self.cap.isOpened() and count < 500:
            t = time.time()
            ret_val, dst = self.cap.read()
            parse_objects = ParseObjects(topology)
            draw_objects = DrawObjects(topology)
            if ret_val == False:
                print("Camera read Error")
                break
            img = cv2.resize(dst, dsize=(WIDTH, HEIGHT), interpolation=cv2.INTER_AREA)
            img = PE.execute(img, dst, t)
            cv2.imshow("result", img)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            count += 1
        cv2.destroyAllWindows()
        out_video.release()
        cap.release()
示例#15
0
def load_model():

    model_log = log('Load {} ... '.format('alexnet & tensorrt'))

    model = alexnet().eval().cuda()
    model.load_state_dict(torch.load('alexnet.pth'))
    model_trt = TRTModule()
    model_trt.load_state_dict(torch.load('alexnet_trt.pth'))

    model_log.end()

    return (model, model_trt)
def demo_with_torch2trt(trt_file_path, data_root):
    model_trt = TRTModule()
    model_trt.load_state_dict(torch.load(trt_file_path))
    row_anchor = tusimple_row_anchor
    img_w, img_h = 1280, 720

    img_transforms = transforms.Compose([
        transforms.Resize((288, 800)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])

    for i in range(10):
        key = cv2.waitKey(1)
        if key == ord("q"):
            break
        img_ori = cv2.imread(data_root)
        img = preprocessing(img_ori)
        img = img.unsqueeze(0)
        img = img.cuda()

        t1 = time.time()
        with torch.no_grad():
            out = model_trt(img)

        col_sample = np.linspace(0, 800 - 1, 100)
        col_sample_w = col_sample[1] - col_sample[0]

        out_j = out[0].data.cpu().numpy()
        t2 = time.time()
        print("Inference time = %.3f ms" % ((t2 - t1) * 1000))
        out_j = out_j[:, ::-1, :]
        prob = scipy.special.softmax(out_j[:-1, :, :], axis=0)
        idx = np.arange(100) + 1
        idx = idx.reshape(-1, 1, 1)
        loc = np.sum(prob * idx, axis=0)
        out_j = np.argmax(out_j, axis=0)
        loc[out_j == 100] = 0
        out_j = loc

        for i in range(out_j.shape[1]):
            if np.sum(out_j[:, i] != 0) > 2:
                for k in range(out_j.shape[0]):
                    if out_j[k, i] > 0:
                        ppp = (int(out_j[k, i] * col_sample_w * img_w / 800) -
                               1,
                               int(img_h * (row_anchor[56 - 1 - k] / 288)) - 1)
                        cv2.circle(img_ori, ppp, img_w // 300, (0, 255, 0), 2)
        cv2.imshow("result", img_ori)
        cv2.imwrite("demo_using_torch2trt.jpg", img_ori)
    cv2.destroyAllWindows()
def ETRI_Initialization(path):
    # Load & Init for Skeletons
    with open('./utils/human_pose.json', 'r') as f:
        human_pose = json.load(f)

    topology = trt_pose.coco.coco_category_to_topology(human_pose)
    parse_objects = ParseObjects(topology)

    print("trtPose start")
    model_skeleton = TRTModule()
    model_path = os.path.join(
        path, 'resnet18_baseline_att_224x224_A_epoch_249_trt_2.pth')
    model_skeleton.load_state_dict(torch.load(model_path))

    print("body action start")
    model_trt_ba = TRTModule()
    model_path = os.path.join(path, 'bodyaction_TRT.pth')
    model_trt_ba.load_state_dict(torch.load(model_path))

    print("hand action start")
    model_trt_ha = TRTModule()
    model_path = os.path.join(path, 'handaction_jc_c_TRT.pth')
    model_trt_ha.load_state_dict(torch.load(model_path))

    print("headpose start")
    model_trt_hp = TRTModule()
    model_path = os.path.join(path, 'headpose_TRT.pth')
    model_trt_hp.load_state_dict(torch.load(model_path))

    return topology, parse_objects, model_skeleton, model_trt_ba, model_trt_ha, model_trt_hp
示例#18
0
    def process_tftrt(self, input_model, output_infer_model):
        if os.path.exists(output_infer_model):
            logging.info("resnet50_pytorch_trt.pth is exist")
            model_trt = TRTModule()
            model_trt.load_state_dict(torch.load(output_infer_model))
        else:
            # load pretrained model
            resnet50_model = load_pytorch_saved_model(input_model)
            # convert to TensorRT feeding sample data as input
            x = torch.ones((1, 3, 224, 224)).cuda()
            model_trt = torch2trt(resnet50_model, [x])

            # save and load
            torch.save(model_trt.state_dict(), output_infer_model)
        return model_trt
示例#19
0
class BackendTensorRT:
  def __init__(self):
    self.model = None

  def version(self):
    return torch.__version__

  def name(self):
    return "pytorch-tensorrt-ofa"

  def load(self, args, ds=None):
    prefix = 'bs%d_is%d_%s_' % (args.batch_size, args.image_size, args.chip_name)
    lib_name = 'pretrained/' + args.model + '/' + prefix + 'torch2trt.pth'
    if os.path.exists(lib_name) and not args.force_build:
      self.model = TRTModule()
      self.model.load_state_dict(torch.load(lib_name))
      self.model.eval()
    else:
      net, _ = load_model(args)
      net = net.cuda()
      net.eval()
      input_data = torch.FloatTensor(
          np.array(ds.get_calibration_set(), np.float32)).cuda()
      if args.calib_algo == 1:
        calib_algo = trt.CalibrationAlgoType.ENTROPY_CALIBRATION
      elif args.calib_algo == 2:
        calib_algo = trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2
      elif args.calib_algo == 3:
        calib_algo = trt.CalibrationAlgoType.MINMAX_CALIBRATION
      size = 1 << (33 if 'T4' in args.chip_name else 34)
      self.model = torch2trt(
          net, [input_data],
          max_batch_size=args.batch_size,
          fp16_mode=True,
          max_workspace_size=size,
          int8_mode=True,
          int8_calib_algorithm=calib_algo,
          int8_calib_batch_size=args.calib_batch_size)
      torch.save(self.model.state_dict(), lib_name)
    log.info('model is ready')
    return self

  def predict(self, image):
    with torch.no_grad():
      output = self.model(image)
      _, output = output.max(1)
    return output
示例#20
0
class TRTPoseExtractor(BaseEstimator, TransformerMixin):
    def __init__(
        self,
        model_path='./models/resnet18_baseline_att_224x224_A_epoch_249_trt.pth'
    ):
        self.model_path = model_path
        with open('./models/human_pose.json', 'r') as f:
            self.human_pose = json.load(f)
        self.topology = trt_pose.coco.coco_category_to_topology(
            self.human_pose)
        self.model_trt = TRTModule()
        self.model_trt.load_state_dict(torch.load(self.model_path))
        self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
        self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda()
        self.device = torch.device('cuda')
        self.parse_objects = ParseObjects(self.topology)
        self.get_keypoints = GetKeypoints(self.topology)

    def preprocess(self, image):
        global device
        device = torch.device('cuda')
        image = cv2.resize(image, (224, 224))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(image)
        image = transforms.functional.to_tensor(image).to(device)
        image.sub_(self.mean[:, None, None]).div_(self.std[:, None, None])
        return image[None, ...]

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        feat_array = []
        filepath = True if isinstance(X[0], str) else False
        for row in X:

            # Read image and resize for model
            image = cv2.imread(row) if filepath else row
            data = self.preprocess(image)
            cmap, paf = self.model_trt(data)
            cmap, paf = cmap.detach().cpu(), paf.detach().cpu()
            counts, objects, peaks = self.parse_objects(cmap, paf)
            feature_vec = self.get_keypoints(image, counts, objects, peaks)
            feat_array.append(feature_vec)

        return np.array(feat_array).squeeze()
示例#21
0
def doexecute():
    print("start")
    CATEGORIES = ['apex']

    device = torch.device('cuda')
    model = torchvision.models.resnet18(pretrained=False)
    model.fc = torch.nn.Linear(512, 2 * len(CATEGORIES))
    model = model.cuda().eval().half()
    # model.load_state_dict(torch.load('model.pth'))
    model.load_state_dict(torch.load('data/model.pth'))

    print("1")

    data = torch.zeros((1, 3, 224, 224)).cuda().half()

    model_trt = torch2trt(model, [data], fp16_mode=True)

    torch.save(model_trt.state_dict(), 'road_following_model_trt.pth')

    print("2")

    model_trt = TRTModule()
    model_trt.load_state_dict(torch.load('road_following_model_trt.pth'))
    print("model load end")

    car = NvidiaRacecar()

    # Left Camera
    camera0 = nano.Camera(device_id=0, flip=2, width=224, height=224, fps=60)
    # Right Camera
    camera1 = nano.Camera(device_id=1, flip=2, width=224, height=224, fps=60)

    STEERING_GAIN = 0.75
    STEERING_BIAS = 0.00

    cnt = 0
    while True:
        image = camera0.read()
        image = preprocess(image).half()
        output = model_trt(image).detach().cpu().numpy().flatten()
        x = float(output[0])
        car.steering = x * STEERING_GAIN + STEERING_BIAS
        car.throttle = 0.5
        print(str(cnt) + ":" + str(x) + ":")
        cnt = cnt + 1
示例#22
0
def test_arcface():
    model_weights = "/workspace/pretrained_models/torch/arcface_50_b1.pth"
    model_trt = TRTModule()
    model_trt.load_state_dict(torch.load(model_weights))
    # x = torch.randn(1, 3, 112, 112).cuda()
    img0 = cv2.imread("/app/images/test/0.jpg")
    img1 = cv2.imread("/app/images/test/1.jpg")
    s = time.time()
    feat0 = extract_feature(img0, model_trt)
    print("inf. time: ", time.time() - s)
    print(feat0.shape)
    test_num = 10
    s = time.time()
    for i in range(test_num):
        feat1 = extract_feature(img1, model_trt)
    print("inf. time: ", (time.time() - s) / test_num)
    dst = cosineDistance(feat0.cpu().numpy(), feat1.cpu().numpy())
    print("cos dst: ", dst)
示例#23
0
def prepare():
    # 走行Button
    GPIO.setmode(GPIO.BOARD)
    GPIO.setup(recbtn, GPIO.IN)
    GPIO.add_event_detect(gobtn,
                          GPIO.FALLING,
                          callback=btn_thrd,
                          bouncetime=200)

    # Left Camera
    camera0 = nano.Camera(device_id=0, flip=2, width=224, height=224, fps=60)
    # Right Camera
    camera1 = nano.Camera(device_id=1, flip=2, width=224, height=224, fps=60)

    CATEGORIES = ['apex']
    device = torch.device('cuda')
    model = torchvision.models.resnet18(pretrained=False)
    model.fc = torch.nn.Linear(512, 2 * len(CATEGORIES))
    model = model.cuda().eval().half()
    # model.load_state_dict(torch.load('model.pth'))
    model.load_state_dict(torch.load('data/model.pth'))

    data = torch.zeros((1, 3, 224, 224)).cuda().half()
    model_trt = torch2trt(model, [data], fp16_mode=True)
    torch.save(model_trt.state_dict(), 'road_following_model_trt.pth')

    model_trt = TRTModule()
    model_trt.load_state_dict(torch.load('road_following_model_trt.pth'))

    car = NvidiaRacecar()

    STEERING_GAIN = -0.75
    STEERING_BIAS = 0.00

    car.throttle = 0.25
    cnt = 0
    while True:
        image = camera0.read()
        image = preprocess(image).half()
        output = model_trt(image).detach().cpu().numpy().flatten()
        x = float(output[0])
        car.steering = x * STEERING_GAIN + STEERING_BIAS
        print(str(cnt) + ":" + str(x) + ":")
        cnt = cnt + 1
class TRTOpenReIDEncoder(Encoder):
    def __init__(self, trt_checkpoint_path: str, img_size: Tuple[int, int] = (128, 256), max_batch_size: int = 8,
                 **kwargs):
        super().__init__(**kwargs)
        self.model_trt = TRTModule()
        self.model_trt.load_state_dict(torch.load(trt_checkpoint_path))
        self.model_trt = self.model_trt.cuda().eval()
        self.size = img_size
        self.max_batch_size = max_batch_size
        self.transform = T.Compose([
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    def _preprocess(self, im_crops):
        def _resize(im, size):
            return cv2.resize(im.astype(np.float32) / 255., size)

        im_batch = torch.cat([self.transform(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float()
        return im_batch

    def encode(self, detections: List[Detection], full_img: np.ndarray) -> List[object]:
        if len(detections) > 0:
            all_crops = []
            full_img = cv2.cvtColor(full_img, cv2.COLOR_BGR2RGB)
            for detection in detections:
                box = detection.box
                crop = full_img[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
                if crop.shape[0] * crop.shape[1] > 0:
                    all_crops.append(crop)
                else:
                    all_crops.append(np.ones((10, 10, 3)).astype(np.float32) * 255)

            outputs = []
            for i in range(0, len(all_crops), self.max_batch_size):
                im_batch = self._preprocess(all_crops[i: min(len(all_crops), i + self.max_batch_size)])
                im_batch = im_batch.cuda()
                output = self.model_trt(im_batch)
                outputs.append(output)
            outputs = torch.cat(outputs, dim=0)
            return outputs.cpu().detach().numpy()
        else:
            return []
示例#25
0
class TensorRTModel(object):
    def __init__(self, model_name, model_path):
        # 1. set device
        self.device = 'cpu'  # 'cuda:0'
        if torch.cuda.device_count() > 0:
            self.device = 'cuda:0'
        else:
            logger.error('TensorRT not working with CPU')
        logger.warning('Torch device {}.'.format(self.device))

        self.name = model_name
        self.model = TRTModule()
        logger.info("Start loading TensorRT module, it's slow")
        self.model.load_state_dict(torch.load(model_path))

    def tensor_to_numpy(self, torch_tensor):
        if isinstance(torch_tensor, tuple):
            output_list = []
            for t in torch_tensor:
                output_list.append(t.cpu().numpy())
            return tuple(output_list)
        else:
            return tuple([torch_tensor.cpu().numpy()])

    def numpy_to_tensor(self, np_arr):
        if isinstance(np_arr, tuple):
            output_list = []
            for na in np_arr:
                output_list.append(torch.from_numpy(na).to(self.device))
            return tuple(output_list)
        elif isinstance(np_arr, torch.Tensor):
            return tuple(torch.from_numpy(numpy_arr).to(self.device))

    def model_forward(self, args):
        with torch.no_grad():
            output_tensor = self.model(*args)
        return output_tensor

    def forward(self, *args):
        input_torch_tensor = self.numpy_to_tensor(args)
        output_torch_tensor = self.model_forward(input_torch_tensor)
        numpy_tensor = self.tensor_to_numpy(output_torch_tensor)
        return numpy_tensor
def inference_with_torch2trt(trt_file_path, data_path):
    model_trt = TRTModule()
    model_trt.load_state_dict(torch.load(trt_file_path))

    time_torch = 0
    for i in range(10):
        path = data_path
        frame = cv2.imread(path)
        img = preprocessing(frame).cuda()
        img = img.unsqueeze(0)
        t3 = time.time()
        with torch.no_grad():
            torch_outputs = model_trt(img)
        torch_outputs[0].data.cpu().numpy()
        t4 = time.time()
        time_torch = t4 - t3
        time_torch = time_torch + time_torch
    print("Inference time with torch2trt = %.3f ms" % (time_torch * 1000))
    return time_torch, torch_outputs
示例#27
0
def export_siamfcpp_fea_trt(task_cfg, parsed_args):
    """ export phase "feature" (basemodel/c_z_k/r_z_k) to trt model
    """
    model = model_builder.build("track", task_cfg.model)
    model = model.eval().cuda()
    model.phase = "feature"
    x = torch.randn(1, 3, 127, 127).cuda()
    fea = model(x)
    output_path = parsed_args.output + "_fea.trt"
    logger.info("start cvt pytorch model")
    model_trt = torch2trt(model, [x])
    logger.info("save trt model to {}".format(output_path))
    torch.save(model_trt.state_dict(), output_path)
    model_trt = TRTModule()
    model_trt.load_state_dict(torch.load(output_path))
    trt_out = model_trt(x)
    np.testing.assert_allclose(to_numpy(fea[0]),
                               to_numpy(trt_out[0]),
                               rtol=1e-03,
                               atol=1e-05)
    logger.info("test accuracy ok")
示例#28
0
def test_arcface_mb(bs):
    model_weights = f"/workspace/pretrained_models/torch/arcface_50_b{bs}_fp16.pth"
    model_trt = TRTModule()
    model_trt.load_state_dict(torch.load(model_weights))
    img0 = cv2.imread("/app/images/test/0.jpg")
    img1 = cv2.imread("/app/images/test/1.jpg")
    s = time.time()
    feat0 = extract_feature_batch([img0 for i in range(bs)],
                                  model_trt,
                                  batch_size=bs)
    print("inf. time: ", time.time() - s)
    print(feat0.shape)
    test_num = 10
    s = time.time()
    for i in range(test_num):
        feat1 = extract_feature_batch([img1 for i in range(bs)],
                                      model_trt,
                                      batch_size=bs)
    print("inf. time: ", (time.time() - s) / test_num)
    dst = cosineDistance(feat0.cpu().numpy()[0], feat1.cpu().numpy()[0])
    print("cos dst: ", dst)
示例#29
0
def load_model_and_run():
    with open('human_pose.json', 'r') as f:
        human_pose = json.load(f)

    topology = trt_pose.coco.coco_category_to_topology(human_pose)
    ut = Utils(topology)
    num_parts = len(human_pose['keypoints'])
    num_links = len(human_pose['skeleton'])
    model = trt_pose.models.resnet18_baseline_att(num_parts,
                                                  2 * num_links).cuda().eval()
    MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth'
    model.load_state_dict(torch.load(MODEL_WEIGHTS))
    WIDTH = 224
    HEIGHT = 224

    data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()
    model_trt = torch2trt.torch2trt(model, [data],
                                    fp16_mode=True,
                                    max_workspace_size=1 << 25)
    OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth'
    torch.save(model_trt.state_dict(), OPTIMIZED_MODEL)
    model_trt = TRTModule()
    model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))

    t0 = time.time()
    torch.cuda.current_stream().synchronize()
    for i in range(50):
        y = model_trt(data)
    torch.cuda.current_stream().synchronize()
    t1 = time.time()

    print(50.0 / (t1 - t0))

    mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
    std = torch.Tensor([0.229, 0.224, 0.225]).cuda()

    # camera = USBCamera(width=WIDTH, height=HEIGHT, capture_fps=15)
    camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=15)

    return ut, camera, model_trt
示例#30
0
def init():

    import torch2trt
    from torch2trt import TRTModule

    with open('./models/human_pose.json', 'r') as f:
        human_pose = json.load(f)
    
    global topology
    topology = coco_category_to_topology(human_pose)

    global WIDTH
    WIDTH = 256
    global HEIGHT
    HEIGHT = 256

    #data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()

    OPTIMIZED_MODEL = Path('./models/densenet121_baseline_att_256x256_B_epoch_160_trt.pth')

    global model_trt
    model_trt = TRTModule()
    model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))

    print('loaded model')

    global mean
    mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
    global std
    std = torch.Tensor([0.229, 0.224, 0.225]).cuda()
    global device
    device = torch.device('cuda')

    global parse_objects
    parse_objects = ParseObjects(topology)
    global draw_objects
    draw_objects = DrawObjects(topology)