def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4):
    dataset = call_obj(**dataset_cfg)
    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=workers)

    # put model on gpus
    if isinstance(model_cfg, list):
        model = [call_obj(**c) for c in model_cfg]
        model = torch.nn.Sequential(*model)
    else:
        model = call_obj(**model_cfg)
    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=range(gpus)).cuda()
    model.eval()

    results = []
    labels = []
    prog_bar = ProgressBar(len(dataset))
    for data, label in data_loader:
        with torch.no_grad():
            output = model(data).data.cpu().numpy()
        results.append(output)
        labels.append(label)
        for i in range(len(data)):
            prog_bar.update()
    results = np.concatenate(results)
    labels = np.concatenate(labels)

    print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1)))
    print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
Пример #2
0
def _non_dist_test(model, dataset, cfg, validate=False):
    data_loader = build_dataloader(dataset,
                                   cfg.data.imgs_per_gpu,
                                   cfg.data.workers_per_gpu,
                                   len(cfg.gpus.test),
                                   dist=False,
                                   shuffle=False)

    print('dataloader built')

    model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda()
    model.eval()
    embeddings = []
    for batch_idx, testdata in enumerate(data_loader):
        embed = model(testdata['img'], return_loss=False)
        embeddings.append(embed)

    embeddings = torch.cat(embeddings)
    metric = model.metric_branch

    # compatibility auc
    auc = dataset.test_compatibility(embeddings, metric)

    # fill-in-blank accuracy
    acc = dataset.test_fitb(embeddings, metric)

    print('Compat AUC: {:.2f} FITB: {:.1f}\n'.format(round(auc, 2),
                                                     round(acc * 100, 1)))
Пример #3
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    cfg.data.test.test_mode = True

    # build the dataloader
    # TODO: support multiple images per gpu (only minor changes are needed)
    dataset = build_dataset(cfg.data.test)
    data_loader = build_dataloader(
        dataset,
        samples_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)

    # build the model and load checkpoint
    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    load_checkpoint(model, args.checkpoint, map_location='cpu')
    if args.fuse_conv_bn:
        model = fuse_module(model)

    model = MMDataParallel(model, device_ids=[0])

    model.eval()

    # the first several iterations may be very slow so skip them
    num_warmup = 5
    pure_inf_time = 0

    # benchmark with 200 image and take the average
    for i, data in enumerate(data_loader):

        torch.cuda.synchronize()
        start_time = time.perf_counter()

        with torch.no_grad():
            model(return_loss=False, rescale=True, **data)

        torch.cuda.synchronize()
        elapsed = time.perf_counter() - start_time

        if i >= num_warmup:
            pure_inf_time += elapsed
            if (i + 1) % args.log_interval == 0:
                fps = (i + 1 - num_warmup) / pure_inf_time
                print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s')

        if (i + 1) == 2000:
            pure_inf_time += elapsed
            fps = (i + 1 - num_warmup) / pure_inf_time
            print(f'Overall fps: {fps:.1f} img / s')
            break
Пример #4
0
def _non_dist_test(model, dataset, cfg, validate=False):
    data_loader = build_dataloader(dataset,
                                   cfg.data.imgs_per_gpu,
                                   cfg.data.workers_per_gpu,
                                   len(cfg.gpus.test),
                                   dist=False,
                                   shuffle=False)

    print('dataloader built')

    model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda()
    model.eval()
    embeddings = []
    for batch_idx, testdata in enumerate(data_loader):
        embed = model(testdata['img'], return_loss=False)
        embeddings.append(embed.data.cpu().numpy())

    # save as numpy array, and then transfer to tensor
    # this is to avoid out-of-memory
    embeddings = np.asarray(embeddings)
    embeddings = torch.from_numpy(embeddings)
    metric = model.triplet_net.metric_branch

    # compatibility auc
    auc = dataset.test_compatibility(embeddings, metric)

    # fill-in-blank accuracy
    acc = dataset.test_fitb(embeddings, metric)

    print('Compat AUC: {:.2f} FITB: {:.1f}\n'.format(round(auc, 2),
                                                     round(acc * 100, 1)))
Пример #5
0
def _non_dist_test(model, dataset, cfg, validate=False):
    data_loader = build_dataloader(dataset,
                                   cfg.data.imgs_per_gpu,
                                   cfg.data.workers_per_gpu,
                                   len(cfg.gpus.test),
                                   dist=False,
                                   shuffle=False)

    print('dataloader built')

    model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda()
    model.eval()

    attr_calculator = AttrCalculator(cfg)

    for batch_idx, testdata in enumerate(data_loader):
        imgs = testdata['img']
        landmark = testdata['landmark']
        attr = testdata['attr']

        attr_pred = model(imgs, attr, landmark=landmark, return_loss=False)

        attr_calculator.collect_result(attr_pred, attr)

        if batch_idx % cfg.print_interval == 0:
            attr_calculator.show_result(batch_idx)

    attr_calculator.show_result()
Пример #6
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.backbone.pretrained = None
    cfg.data.test.test_mode = True

    # build the dataloader
    dataset = build_dataset(cfg.data.test, dict(test_mode=True))
    data_loader = build_dataloader(
        dataset,
        videos_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)

    # build the model and load checkpoint
    model = build_model(
        cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg'))
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    if args.fuse_conv_bn:
        model = fuse_conv_bn(model)

    model = MMDataParallel(model, device_ids=[0])

    model.eval()

    # the first several iterations may be very slow so skip them
    num_warmup = 5
    pure_inf_time = 0

    # benchmark with 2000 video and take the average
    for i, data in enumerate(data_loader):

        torch.cuda.synchronize()
        start_time = time.perf_counter()

        with torch.no_grad():
            model(return_loss=False, **data)

        torch.cuda.synchronize()
        elapsed = time.perf_counter() - start_time

        if i >= num_warmup:
            pure_inf_time += elapsed
            if (i + 1) % args.log_interval == 0:
                fps = (i + 1 - num_warmup) / pure_inf_time
                print(
                    f'Done video [{i + 1:<3}/ 2000], fps: {fps:.1f} video / s')

        if (i + 1) == 200:
            pure_inf_time += elapsed
            fps = (i + 1 - num_warmup) / pure_inf_time
            print(f'Overall fps: {fps:.1f} video / s')
            break
Пример #7
0
def _non_dist_test(model, dataset, cfg, validate=False):
    data_loader = build_dataloader(dataset,
                                   cfg.data.imgs_per_gpu,
                                   cfg.data.workers_per_gpu,
                                   len(cfg.gpus.test),
                                   dist=False,
                                   shuffle=False)

    print('dataloader built')

    model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda()
    model.eval()

    evaluator = LandmarkDetectorEvaluator(cfg.img_size, cfg.landmark_num)
    error_list, det_percent_list = [], []

    for batch_idx, testdata in enumerate(data_loader):
        img = testdata['img']
        landmark = testdata['landmark_for_regression']
        vis = testdata['vis']

        pred_vis, pred_lm = model(img, return_loss=False)
        det_error, det_lm_percent = evaluator.evaluate_landmark_detection(
            pred_vis, pred_lm, vis, landmark)
        if batch_idx % 20 == 0:
            print('Batch idx {:d}, normalized error = {:.4f}, '
                  'det. percent = {:.2f}'.format(batch_idx, det_error,
                                                 det_lm_percent))
            error_list.append(det_error)
            det_percent_list.append(det_lm_percent)

    print('Fashion Landmark Detection Normalized Error: {:.4f}, '
          'Detected Percent: {:.2f}'.format(
              sum(error_list) / len(error_list),
              sum(det_percent_list) / len(det_percent_list)))
Пример #8
0
def modelInit(configPath, checkPointPath):
    # 进行cfg的一些配置
    cfg = loadCfg(configPath)
    # 我选择单卡测试去看结果图...
    distributed = False
    # dataset 初始化
    dataset = build_dataset(cfg.data.test)
    # dataloader 初始化
    data_loader = build_dataloader(dataset,
                                   imgs_per_gpu=1,
                                   workers_per_gpu=cfg.data.workers_per_gpu,
                                   dist=distributed,
                                   shuffle=False)
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    cfg.data.test.test_mode = True
    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    checkpoint = load_checkpoint(model, checkPointPath, map_location='cpu')
    if 'CLASSES' in checkpoint['meta']:
        model.CLASSES = checkpoint['meta']['CLASSES']
    else:
        model.CLASSES = dataset.CLASSES
    model = MMDataParallel(model, device_ids=[0])
    model.eval()
    return model, dataset, data_loader
Пример #9
0
def main():
    args = parse_args()
    config_file = './cascade_rcnn_r50_rfp_sac_iou_ls_alldata-v3_e15.py'
    checkpoint_file = 'epoch_15.pth'
    device = 'cuda:0'
    cfg = Config.fromfile(config_file)
    # build model
    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
    checkpoint = load_checkpoint(model, checkpoint_file, map_location=device)
    test_json_raw = json.load(open(cfg.data.test.ann_file))
    imgid2name = {}
    for imageinfo in test_json_raw['images']:
        imgid = imageinfo['id']
        imgid2name[imageinfo['file_name']] = imgid
        # imgid2name[imgid] = imageinfo['file_name']
    wrap_fp16_model(model)  # 采用fp16加速预测
    # model = fuse_conv_bn(model)  # 加上后出错
    # build the dataloader
    samples_per_gpu = cfg.data.test.pop('samples_per_gpu',
                                        1)  # aug_test不支持batch_size>1
    dataset = build_dataset(cfg.data.test)
    data_loader = build_dataloader(dataset,
                                   samples_per_gpu=samples_per_gpu,
                                   workers_per_gpu=cfg.data.workers_per_gpu,
                                   dist=False,
                                   shuffle=False)
    model = MMDataParallel(model, device_ids=[0])  # 为啥加?(不加就错了)
    model.eval()
    json_results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)
        batch_size = len(result)
        result = result[0]  # 每次只输入一张
        img_metas = data['img_metas'][0].data[0]
        # print(result)
        # predict = adaptive_inference_detector(model, image)
        # basename = img_metas[0]['ori_filename']
        # image = cv2.imread(os.path.join(cfg.data.test.img_prefix, basename))
        for i, bboxes in enumerate(result):
            if len(bboxes) > 0:
                for bbox in bboxes:
                    x1, y1, x2, y2, score = bbox.tolist()
                    if score >= 0.001:
                        data = dict()
                        data['image_id'] = imgid2name[img_metas[0]
                                                      ['ori_filename']]
                        data['bbox'] = [x1, y1, x2 - x1, y2 - y1]
                        data['score'] = float(score)
                        data['category_id'] = i + 1
                        json_results.append(data)
        for _ in range(batch_size):
            prog_bar.update()
    mmcv.dump(json_results, args.jsonfile)
Пример #10
0
def test_cluster_det(model, cfg, logger):
    if cfg.load_from:
        load_checkpoint(model, cfg.load_from)

    for k, v in cfg.model['kwargs'].items():
        setattr(cfg.test_data, k, v)
    dataset = build_dataset(cfg.test_data)
    processor = build_processor(cfg.stage)

    losses = []
    output_probs = []

    if cfg.gpus == 1:
        data_loader = build_dataloader(dataset,
                                       processor,
                                       cfg.batch_size_per_gpu,
                                       cfg.workers_per_gpu,
                                       train=False)

        model = MMDataParallel(model, device_ids=range(cfg.gpus))
        if cfg.cuda:
            model.cuda()

        model.eval()
        for i, data in enumerate(data_loader):
            with torch.no_grad():
                output, loss = model(data, return_loss=True)
                losses += [loss.item()]
                if i % cfg.log_config.interval == 0:
                    if dataset.ignore_meta:
                        logger.info('[Test] Iter {}/{}'.format(
                            i, len(data_loader)))
                    else:
                        logger.info('[Test] Iter {}/{}: Loss {:.4f}'.format(
                            i, len(data_loader), loss))
                if cfg.save_output:
                    output = output.view(-1)
                    prob = output.data.cpu().numpy()
                    output_probs.append(prob)
    else:
        raise NotImplementedError

    if not dataset.ignore_meta:
        avg_loss = sum(losses) / len(losses)
        logger.info('[Test] Overall Loss {:.4f}'.format(avg_loss))

    if cfg.save_output:
        fn = os.path.basename(cfg.load_from)
        opath = os.path.join(cfg.work_dir, fn[:fn.rfind('.pth')] + '.npz')
        meta = {
            'tot_inst_num': dataset.inst_num,
            'proposal_folders': cfg.test_data.proposal_folders,
        }
        print('dump output to {}'.format(opath))
        output_probs = np.concatenate(output_probs).ravel()
        np.savez_compressed(opath, data=output_probs, meta=meta)
Пример #11
0
def test(model, dataset, cfg, logger):
    if cfg.load_from:
        print('load from {}'.format(cfg.load_from))
        load_checkpoint(model, cfg.load_from, strict=True, logger=logger)

    losses = []
    edges = []
    scores = []

    if cfg.gpus == 1:
        data_loader = build_dataloader(dataset,
                                       cfg.batch_size_per_gpu,
                                       cfg.workers_per_gpu,
                                       train=False)

        model = MMDataParallel(model, device_ids=range(cfg.gpus))
        if cfg.cuda:
            model.cuda()

        model.eval()
        for i, (data, cid, node_list) in enumerate(data_loader):
            with torch.no_grad():
                _, _, h1id, gtmat = data
                pred, loss = model(data, return_loss=True)
                losses += [loss.item()]
                pred = F.softmax(pred, dim=1)
                if i % cfg.log_config.interval == 0:
                    if dataset.ignore_label:
                        logger.info('[Test] Iter {}/{}'.format(
                            i, len(data_loader)))
                    else:
                        acc, p, r = online_evaluate(gtmat, pred)
                        logger.info(
                            '[Test] Iter {}/{}: Loss {:.4f}, '
                            'Accuracy {:.4f}, Precision {:.4f}, Recall {:.4f}'.
                            format(i, len(data_loader), loss, acc, p, r))

                node_list = node_list.numpy()
                bs = len(cid)
                h1id_num = len(h1id[0])
                for b in range(bs):
                    cidb = cid[b].int().item()
                    nlst = node_list[b]
                    center_idx = nlst[cidb]
                    for j, n in enumerate(h1id[b]):
                        edges.append([center_idx, nlst[n.item()]])
                        scores.append(pred[b * h1id_num + j, 1].item())
    else:
        raise NotImplementedError

    if not dataset.ignore_label:
        avg_loss = sum(losses) / len(losses)
        logger.info('[Test] Overall Loss {:.4f}'.format(avg_loss))

    return np.array(edges), np.array(scores), len(dataset)
Пример #12
0
def init_model():
    config = './configs/htc/htc_hrnetv2p_w48_20e_kaggle_pku_no_semantic_translation_wudi_car_insurance.py'
    checkpoint_path = '/data/Kaggle/checkpoints/all_cwxe99_3070100flip05resumme93Dec29-16-28-48/epoch_100.pth'

    cfg = mmcv.Config.fromfile(config)
    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)

    checkpoint = load_checkpoint(model, checkpoint_path, map_location='cpu')
    model.CLASSES = checkpoint['meta']['CLASSES']

    model = MMDataParallel(model, device_ids=[0])
    model.eval()
    return model, cfg
Пример #13
0
def _non_dist_test(model, query_set, gallery_set, cfg, validate=False):
    model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda()
    model.eval()

    query_embeds = _process_embeds(query_set, model, cfg)
    gallery_embeds = _process_embeds(gallery_set, model, cfg)

    query_embeds_np = np.array(query_embeds)
    gallery_embeds_np = np.array(gallery_embeds)

    e = Evaluator(cfg.data.query.id_file,
                  cfg.data.gallery.id_file,
                  extract_feature=cfg.extract_feature)
    e.evaluate(query_embeds_np, gallery_embeds_np)
Пример #14
0
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=2):
    dataset = call_obj(**dataset_cfg)
    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=workers)

    # put model on gpus
    if isinstance(model_cfg, list):
        model = [call_obj(**c) for c in model_cfg]
        model = torch.nn.Sequential(*model)
    else:
        model = call_obj(**model_cfg)
    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=range(gpus)).cuda()
    #model = MMDataParallel(model)
    model.eval()

    results = []
    labels = []
    prog_bar = ProgressBar(len(dataset))
    total_time = 0
    for data, label in data_loader:
        with torch.no_grad():
            start = time.time()
            output = model(data).data.cpu().numpy()

            if torch.cuda.is_available():
                torch.cuda.synchronize()

            t = time.time() - start
            total_time += t

        results.append(output)
        labels.append(label)
        for i in range(len(data)):
            prog_bar.update()
    results = np.concatenate(results)
    labels = np.concatenate(labels)

    #macs, params = get_model_complexity_info(model.cuda(), (3, 300, 18, 2), as_strings=True,
    #                                              print_per_layer_stat=True, verbose=True)
    #print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
    #print('{:<30}  {:<8}'.format('Number of parameters: ', params))

    print("Average infer time: ", total_time / len(data_loader))
    print("Total infer time: ", total_time)
    print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1)))
    print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
Пример #15
0
def extract_features(image_set, cfg, save_feature_dir):

    model = build_retriever(cfg.model)
    print('model built')
    model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda()
    model.eval()

    embeds = _process_embeds(image_set, model, cfg)

    if not os.path.exists(save_feature_dir):
        os.makedirs(save_feature_dir)
    save_path = os.path.join(save_feature_dir, 'extracted_features.mat')

    sio.savemat(save_path, {'embeds': embeds})
    print('extracted features saved to : %s' % save_path)
Пример #16
0
def _non_dist_test(model, dataset, cfg, validate=False):
    model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda()
    model.eval()

    embeds = _process_embeds(dataset, model, cfg)

    metric = model.module.triplet_net.metric_branch

    # compatibility auc
    auc = dataset.test_compatibility(embeds, metric)

    # fill-in-blank accuracy
    acc = dataset.test_fitb(embeds, metric)

    print('Compat AUC: {:.2f} FITB: {:.1f}\n'.format(round(auc, 2),
                                                     round(acc * 100, 1)))
Пример #17
0
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4):
    #cnt = 0
    #confusion
    conf_matrix = torch.zeros(model_cfg.num_class, model_cfg.num_class)
    #confusion
    set_determined_seed(seed)
    torch.multiprocessing.set_sharing_strategy('file_system')
    dataset = call_obj(**dataset_cfg)
    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=workers)

    # put model on gpus
    if isinstance(model_cfg, list):
        model = [call_obj(**c) for c in model_cfg]
        model = torch.nn.Sequential(*model)
    else:
        model = call_obj(**model_cfg)
    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=get_gpus(gpus)).cuda()
    model.eval()

    results = []
    labels = []
    prog_bar = ProgressBar(len(dataset))
    for data, label in data_loader:
        with torch.no_grad():
            # cnt += 1
            # print("\n"+str(cnt))
            # torch.cuda.empty_cache()
            output = model(data).data.cpu().numpy()
        results.append(output)
        labels.append(label)
        for i in range(len(data)):
            prog_bar.update()
    results = np.concatenate(results)
    labels = np.concatenate(labels)

    #confusion
    conf_matrix = confusion_matrix(
        torch.max(torch.from_numpy(results), 1)[1], labels, conf_matrix)
    np.save('/home/computer/WBH/GCN/INTERGCN/conf.npy', conf_matrix)
    #confusion

    print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1)))
    print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
Пример #18
0
def detect(inputs,
           results,
           model_cfg,
           dataset_cfg,
           checkpoint,
           video_dir,
           batch_size=64,
           gpus=1,
           workers=4):
    print('detect start')
    # put model on gpus
    if isinstance(model_cfg, list):
        model = [call_obj(**c) for c in model_cfg]
        model = torch.nn.Sequential(*model)
    else:
        model = call_obj(**model_cfg)
    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=range(gpus)).cuda()
    model.eval()

    results = []
    labels = []
    video_file_list = os.listdir(video_dir)
    prog_bar = ProgressBar(len(video_file_list))
    for video_file in video_file_list:
        data = inputs.get()
        data_loader = data_parse(data, dataset_cfg.pipeline,
                                 dataset_cfg.data_source.num_track)
        data, label = data_loader
        with torch.no_grad():
            data = torch.from_numpy(data)
            # 增加一维,表示batch_size
            data = data.unsqueeze(0)
            data = data.float().to("cuda:0").detach()
            output = model(data).data.cpu().numpy()
        results.append(output)
        labels.append(torch.tensor([label]))
        for i in range(len(data)):
            prog_bar.update()
    print('--------', results, labels, '--------------')
    results = np.concatenate(results)
    labels = np.concatenate(labels)

    print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1)))
    print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
Пример #19
0
def main():
    args = parse_args()

    model = init_detector(args.config, args.checkpoint)
    cfg = model.cfg
    assert getattr(detectors, cfg.model['type']) is \
        detectors.SingleStageDetector
    model = MMDataParallel(model, device_ids=[0])

    batch = torch.FloatTensor(1, 3, cfg.input_size, cfg.input_size).cuda()
    input_shape = (cfg.input_size, cfg.input_size, 3)
    scale = np.array([1, 1, 1, 1], dtype=np.float32)
    data = dict(img=batch,
                img_meta=[{
                    'img_shape': input_shape,
                    'scale_factor': scale
                }])
    model.eval()
    model.module.onnx_export(export_name=args.output, **data)
    print("export end")
Пример #20
0
def train_flownet(model,
                  dataset,
                  cfg,
                  distributed=False,
                  validate=False,
                  logger=None):
    if logger is None:
        logger = get_root_logger(cfg.log_level)

    # start training
    # prepare data loaders
    data_loaders = [
        build_dataloader(dataset,
                         cfg.data.imgs_per_gpu,
                         cfg.data.workers_per_gpu,
                         cfg.gpus,
                         dist=False)
    ]

    # put model on gpus
    model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()

    # build runner
    runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
                    cfg.log_level)
    runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)

    # if cfg.resume_from:
    #     runner.resume(cfg.resume_from)
    # elif cfg.load_from:
    #     runner.load_checkpoint(cfg.load_from)
    model.eval()
    for param in model.parameters():
        param.requires_grad = False
    # model.load_flow()
    model.module.flow_head.train()
    for param in model.module.flow_head.parameters():
        param.requires_grad = True
    # training
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
Пример #21
0
def _non_dist_test_cate_attr(model, dataset, cfg, validate=False):
    data_loader = build_dataloader(dataset,
                                   cfg.data.imgs_per_gpu,
                                   cfg.data.workers_per_gpu,
                                   len(cfg.gpus.test),
                                   dist=False,
                                   shuffle=False)

    print('dataloader built')

    model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda()
    model.eval()

    attr_calculator = AttrCalculator(
        cfg,
        topns=[3, 5],
        show_attr_name=True,
        attr_name_file=cfg.data.test['attr_cloth_file'])
    cate_calculator = CateCalculator(cfg, topns=[1, 3, 5])

    for batch_idx, testdata in enumerate(data_loader):
        imgs = testdata['img']
        landmark = testdata['landmark']
        attr = testdata['attr']
        cate = testdata['cate']

        attr_pred, cate_pred = model(imgs,
                                     attr,
                                     landmark=landmark,
                                     return_loss=False)

        attr_calculator.collect_result(attr_pred, attr)
        cate_calculator.collect_result(cate_pred, cate)

        if batch_idx % cfg.print_interval == 0:
            attr_calculator.show_result(batch_idx)
            cate_calculator.show_result(batch_idx)

    attr_calculator.show_result()
    attr_calculator.show_per_attr_result()
    cate_calculator.show_result()
def main():
    args = parse_args()

    model = init_detector(args.config, args.checkpoint)
    cfg = model.cfg
    assert getattr(detectors, cfg.model['type']) is detectors.SingleStageDetector
    model = MMDataParallel(model, device_ids=[0])

    batch = torch.FloatTensor(1, 3, cfg.input_size, cfg.input_size).cuda()
    input_shape = (cfg.input_size, cfg.input_size, 3)
    scale = np.array([1, 1, 1, 1], dtype=np.float32)
    data = dict(img=batch, img_meta=[{'img_shape': input_shape, 'scale_factor': scale}])

    model.eval()
    model.module.onnx_export = onnx_export.__get__(model.module)
    model.module.forward = forward.__get__(model.module)
    model.module.forward_export = forward_export_detector.__get__(model.module)
    model.module.bbox_head.export_forward = export_forward_ssd_head.__get__(model.module.bbox_head)
    model.module.bbox_head._prepare_cls_scores_bbox_preds = prepare_cls_scores_bbox_preds_ssd_head.__get__(model.module.bbox_head)
    model.module.bbox_head.get_bboxes = get_bboxes_ssd_head.__get__(model.module.bbox_head)
    model.module.onnx_export(export_name=args.output, **data)
Пример #23
0
def main():

    args = parse_args()

    # === config ===
    cfg = Config.fromfile(args.config)
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    cfg.data.test.test_mode = True

    dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True))
    loader = build_dataloader(
        dataset,
        cfg.data.tasks_per_gpu // cfg.data.tasks_per_gpu,
        max(1, cfg.data.workers_per_gpu // cfg.data.tasks_per_gpu),
        args.gpus,
        dist=False,
        customized_sampler=False,
        shuffle=False)
    model = build_model(cfg.model,
                        train_cfg=cfg.train_cfg,
                        test_cfg=cfg.test_cfg)
    model = MMDataParallel(model, device_ids=range(args.gpus)).cuda()
    load_checkpoint(model, args.checkpoint)
    model.eval()

    results = []
    prog_bar = mmcv.ProgressBar(len(loader))
    for data in loader:
        with torch.no_grad():
            result = model(return_loss=False, **data)
        results.append(result)
        prog_bar.update()

    topk = (1, 3, 5, 10) if not args.topk else args.topk
    evaluate(results, eval=args.eval, topk=topk)
Пример #24
0
    def setup(self, config_file, checkpoint_file, fuse_conv):
        cfg = Config.fromfile(config_file)
        # set cudnn_benchmark
        if cfg.get('cudnn_benchmark', False):
            torch.backends.cudnn.benchmark = True
        cfg.model.pretrained = None
        cfg.data.test.test_mode = True

        # build the dataloader
        # TODO: support multiple images per gpu (only minor changes are needed)
        dataset = build_dataset(cfg.data.test)
        data_loader = build_dataloader(
            dataset,
            samples_per_gpu=1,
            workers_per_gpu=cfg.data.workers_per_gpu,
            dist=False,
            shuffle=False)

        # build the model and load checkpoint
        model = build_detector(cfg.model,
                               train_cfg=None,
                               test_cfg=cfg.test_cfg)
        fp16_cfg = cfg.get('fp16', None)
        if fp16_cfg is not None:
            wrap_fp16_model(model)
        load_checkpoint(model, checkpoint_file, map_location='cpu')
        if fuse_conv:
            model = fuse_module(model)

        self._fuse_conv = fuse_conv

        model = MMDataParallel(model, device_ids=[0])

        model.eval()

        return model, data_loader, dataset
Пример #25
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    if args.cfg_options is not None:
        cfg.merge_from_dict(args.cfg_options)

    # import modules from plguin/xx, registry will be updated
    if hasattr(cfg, 'plugin') & cfg.plugin:
        import importlib
        if hasattr(cfg, 'plugin_dir'):
            plugin_dir = cfg.plugin_dir
            _module_dir = os.path.dirname(plugin_dir)
            _module_dir = _module_dir.split('/')
            _module_path = _module_dir[0]
            
            for m in _module_dir[1:]:
                _module_path = _module_path + '.' + m
            print(_module_path)
            plg_lib = importlib.import_module(_module_path)
        else:
            # import dir is the dirpath for the config file
            _module_dir = os.path.dirname(args.config)
            _module_dir = _module_dir.split('/')
            _module_path = _module_dir[0]
            for m in _module_dir[1:]:
                _module_path = _module_path + '.' + m
            print(_module_path)
            plg_lib = importlib.import_module(_module_path)

        
    # import modules from string list.
    if cfg.get('custom_imports', None):
        from mmcv.utils import import_modules_from_strings
        import_modules_from_strings(**cfg['custom_imports'])
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    cfg.model.pretrained = None
    # in case the test dataset is concatenated
    samples_per_gpu = 1
    if isinstance(cfg.data.test, dict):
        cfg.data.test.test_mode = True
        samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
        if samples_per_gpu > 1:
            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
            cfg.data.test.pipeline = replace_ImageToTensor(
                cfg.data.test.pipeline)
    elif isinstance(cfg.data.test, list):
        for ds_cfg in cfg.data.test:
            ds_cfg.test_mode = True
        samples_per_gpu = max(
            [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])
        if samples_per_gpu > 1:
            for ds_cfg in cfg.data.test:
                ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)

    distributed = False
    
    # set random seeds
    if args.seed is not None:
        set_random_seed(args.seed, deterministic=args.deterministic)

    # build the dataloader
    dataset = build_dataset(cfg.data.test)
    data_loader = build_dataloader(
        dataset,
        samples_per_gpu=samples_per_gpu,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False)

    if not os.path.exists(args.out_dir):
        os.mkdir(args.out_dir)
    # build the model and load checkpoint
    cfg.model.train_cfg = None
    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
    #from IPython import embed
    #embed()
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
    if args.fuse_conv_bn:
        model = fuse_conv_bn(model)
    
    model = MMDataParallel(model, device_ids=[0])
    
    model.eval()
    output_list = []
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            data = scatter(data, [-1])[0]
            for k, v in data.items():
                if isinstance(v, torch.Tensor):
                    data[k] = v.cuda()
            outputs = model.module.eval_forward(data)
            output_list.append(outputs)
            if i >= 100:
                break
    
    merged_output_list = []
    for i, output in enumerate(output_list):
        save_dir = os.path.join(args.out_dir, 'sample-{}'.format(i))
        if not os.path.isdir(save_dir):
            os.mkdir(save_dir)
        outputs = parse_output(output, save_dir)
        merged_output_list.append(outputs)
    
    save_dir = os.path.join(args.out_dir, 'gifs')
    if not os.path.isdir(save_dir):
        os.mkdir(save_dir)
    merge_output(merged_output_list, save_dir)
Пример #26
0
def test_cluster_mall(model1, cfg, logger):
    model = torch.load(cfg.load_from1)

    for k, v in cfg.model1['kwargs'].items():
        setattr(cfg.test_data, k, v)
    for k, v in cfg.model2['kwargs'].items():
        setattr(cfg.test_data, k, v)
    setattr(cfg.test_data, 'phase', 'test')
    dataset = build_dataset_mall(cfg.test_data)
    processor = build_processor(cfg.stage)

    losses = []
    output_probs = []
    IoP_GT = []
    IoP_binary_GT = []
    num_impure_pro = 0
    if cfg.gpus == 1:
        data_loader = build_dataloader(dataset,
                                       processor,
                                       cfg.batch_size_per_gpu,
                                       cfg.workers_per_gpu,
                                       train=False)

        model = MMDataParallel(model, device_ids=range(cfg.gpus))
        if cfg.cuda:
            model.cuda()
        output_IoP_loss = []
        model.eval()
        for i, data in enumerate(data_loader):
            with torch.no_grad():
                output, loss = model(data, return_loss=True)
                losses += [loss.item()]
                num_impure_pro += (data[-1] == 0).nonzero().shape[0]
                if i % cfg.log_config.interval == 0:
                    logger.info('[Test] Iter {}/{}: Loss {:.4f}'.format(
                        i, len(data_loader), loss))
                if cfg.save_output:
                    output = output[:, 1]
                    output = output.view(-1)
                    output_probs.append(output.tolist())
                    IoP_GT.append(data[-1].tolist())
    else:
        raise NotImplementedError
    output_probs1 = [iop for item in output_probs for iop in item]
    output_probs = np.array([iop for item in output_probs for iop in item])
    IoP_GT0 = [iop for item in IoP_GT for iop in item]
    IoP_GT = np.array([iop for item in IoP_GT for iop in item])
    output_probs = torch.from_numpy(output_probs)
    IoP_GT1 = torch.from_numpy(IoP_GT)
    #HistgramStd.eval_batch_new(output_probs, IoP_GT1, 'BCE')
    output_probs2 = np.array(output_probs1)
    # plot roc curve
    false_positive_rate, true_positive_rate, thresholds = roc_curve(
        IoP_GT, output_probs2)
    roc_auc = auc(false_positive_rate, true_positive_rate)
    plt.title('ROC')
    plt.plot(false_positive_rate,
             true_positive_rate,
             'b',
             label='AUC = %0.4f' % roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0, 1], [0, 1], 'r--')
    plt.ylabel('TPR')
    plt.xlabel('FPR')
    plt.draw()
    plt.savefig(cfg.work_dir + '/ROC.jpg')
    plt.close()

    # plot IoP distribution curve
    pos01 = np.where((IoP_GT1 == 0))
    iop_01 = output_probs2[pos01]
    pos02 = np.where((IoP_GT1 == 1))
    iop_02 = output_probs2[pos02]
    if cfg.save_output:
        plt.figure(1)
        plt.subplot(1, 1, 1)
        plt.boxplot([iop_01.tolist(), iop_02.tolist()], notch=True)

        x_tricks = np.array([1, 2])
        plt.xticks(x_tricks)
        plt.grid(axis='y')
        plt.draw()
        plt.savefig(cfg.work_dir + '/Estimated_IoP.jpg')
        plt.close()

    estimated_iop_dict = {}
    for i, node in enumerate(dataset.lst):
        node_name = node.split('/')[-1]
        estimated_iop = output_probs1[i]
        estimated_iop_dict[node_name] = estimated_iop
    with open(cfg.work_dir + '/Estimated_IoP_eval_dict.json', 'w') as f:
        json.dump(estimated_iop_dict, f)
    with open(cfg.work_dir + '/Estimated_IoP_eval.json', 'w') as f:
        json.dump(output_probs1, f)
    with open(cfg.work_dir + '/GT_IoP_eval.json', 'w') as f:
        json.dump(IoP_GT0, f)
Пример #27
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    if args.cfg_options is not None:
        cfg.merge_from_dict(args.cfg_options)
    # import modules from string list.
    if cfg.get('custom_imports', None):
        from mmcv.utils import import_modules_from_strings
        import_modules_from_strings(**cfg['custom_imports'])
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    cfg.data.test.test_mode = True

    # build the dataloader
    samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
    if samples_per_gpu > 1:
        # Replace 'ImageToTensor' to 'DefaultFormatBundle'
        cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
    dataset = build_dataset(cfg.data.test)
    data_loader = build_dataloader(dataset,
                                   samples_per_gpu=1,
                                   workers_per_gpu=cfg.data.workers_per_gpu,
                                   dist=False,
                                   shuffle=False)

    # build the model and load checkpoint
    cfg.model.train_cfg = None
    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    load_checkpoint(model, args.checkpoint, map_location='cpu')
    if args.fuse_conv_bn:
        model = fuse_conv_bn(model)

    model = MMDataParallel(model, device_ids=[0])

    model.eval()

    # the first several iterations may be very slow so skip them
    num_warmup = 5
    pure_inf_time = 0

    # benchmark with 2000 image and take the average
    for i, data in enumerate(data_loader):

        torch.cuda.synchronize()
        start_time = time.perf_counter()

        with torch.no_grad():
            model(return_loss=False, rescale=True, **data)

        torch.cuda.synchronize()
        elapsed = time.perf_counter() - start_time

        if i >= num_warmup:
            pure_inf_time += elapsed
            if (i + 1) % args.log_interval == 0:
                fps = (i + 1 - num_warmup) / pure_inf_time
                print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s')

        if (i + 1) == 2000:
            pure_inf_time += elapsed
            fps = (i + 1 - num_warmup) / pure_inf_time
            print(f'Overall fps: {fps:.1f} img / s')
            break
Пример #28
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)

    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    if args.ckpt:
        cfg.resume_from = args.ckpt

    cfg.test_cfg.rcnn.score_thr = 0.5

    FOCAL_LENGTH = cfg.get('FOCAL_LENGTH', 1000)

    model = build_detector(cfg.model,
                           train_cfg=cfg.train_cfg,
                           test_cfg=cfg.test_cfg)
    if cfg.checkpoint_config is not None:
        # save mmdet version, config file content and class names in
        # checkpoints as meta data
        cfg.checkpoint_config.meta = dict(mmdet_version=__version__,
                                          config=cfg.text,
                                          CLASSES=('Human', ))
    # add an attribute for visualization convenience
    model.CLASSES = ('Human', )

    model = MMDataParallel(model, device_ids=[0]).cuda()

    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)

    runner = Runner(model, lambda x: x, optimizer, cfg.work_dir, cfg.log_level)
    runner.resume(cfg.resume_from)
    model = runner.model
    model.eval()
    # necessary for headless rendering
    os.environ['PYOPENGL_PLATFORM'] = 'egl'
    render = Renderer(focal_length=FOCAL_LENGTH)
    img_transform = ImageTransform(size_divisor=32, **img_norm_cfg)
    img_scale = cfg.common_val_cfg.img_scale

    with torch.no_grad():
        folder_name = args.image_folder
        output_folder = args.output_folder
        os.makedirs(output_folder, exist_ok=True)
        images = os.listdir(folder_name)
        for image in images:
            file_name = osp.join(folder_name, image)
            img = cv2.imread(file_name)
            ori_shape = img.shape

            img, img_shape, pad_shape, scale_factor = img_transform(
                img, img_scale)

            # Force padding for the issue of multi-GPU training
            padded_img = np.zeros((img.shape[0], img_scale[1], img_scale[0]),
                                  dtype=img.dtype)
            padded_img[:, :img.shape[-2], :img.shape[-1]] = img
            img = padded_img

            assert img.shape[1] == 512 and img.shape[
                2] == 832, "Image shape incorrect"

            data_batch = dict(
                img=DC([to_tensor(img[None, ...])], stack=True),
                img_meta=DC([{
                    'img_shape': img_shape,
                    'scale_factor': scale_factor,
                    'flip': False,
                    'ori_shape': ori_shape
                }],
                            cpu_only=True),
            )
            bbox_results, pred_results = model(**data_batch, return_loss=False)

            if pred_results is not None:
                pred_results['bboxes'] = bbox_results[0]
                img = denormalize(img)
                img_viz = prepare_dump(pred_results, img, render, bbox_results,
                                       FOCAL_LENGTH)
                cv2.imwrite(
                    f'{file_name.replace(folder_name, output_folder)}.output.jpg',
                    img_viz[:, :, ::-1])
def main():
    args = parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpuid

    img_dir = args.img_dir
    out_dir = args.out_dir
    batch_size = args.batch_size

    cfg = mmcv.Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    cfg.data.test.test_mode = True
    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    # build the dataloader
    if args.img_dir != '':
        file_list = common.load_filepaths(args.img_dir,
                                          suffix=('.jpg', '.png', '.jpeg'),
                                          recursive=True)
    elif args.img_list != '':
        file_list = parse_testfile(args.img_list)
    else:
        raise "Both img_dir and img_list is empty."

    dataset = FilesDataset(file_list, cfg.test_pipeline)
    data_loader = build_dataloader(dataset,
                                   imgs_per_gpu=batch_size,
                                   workers_per_gpu=batch_size,
                                   dist=distributed,
                                   shuffle=False)

    # build the model and load checkpoint
    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')

    model = reweight_cls(model, args.tau).cuda()

    model = MMDataParallel(model, device_ids=[0])

    model.eval()
    count = 0
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            # bbox_results, segm_results
            results = model(return_loss=False, rescale=True, **data)

        # batch
        #for result  in results:
        #    file_path = file_list[count]
        #    save_name = file_path.replace('/home/songbai.xb/workspace/projects/TAO/data/TAO/frames/val/', '')
        #    save_path = os.path.join(out_dir, save_name)
        #    common.makedirs(os.path.dirname(save_path))
        #    save_in_tao_format(result, save_path)
        #    count += 1
        file_path = file_list[i]
        save_name = file_path.replace(
            '/home/songbai.xb/workspace/projects/TAO/data/TAO/frames/val/', '')
        save_name = save_name.replace('.jpg', '.pkl')
        save_path = os.path.join(out_dir, save_name)
        common.makedirs(os.path.dirname(save_path))
        save_in_tao_format(results[0], save_path)
Пример #30
0
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4):

    model = call_obj(**model_cfg)
    edge = model.graph.edge
    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=range(gpus)).cuda()
    model.eval()

    sys.path.append('{}/{}/build/python'.format(os.getcwd(), "openpose"))

    try:
        from openpose import pyopenpose as op
    except:
        print('Can not find Openpose Python API.')
        return
    opWrapper = op.WrapperPython()

    params = dict(model_folder='openpose/models', model_pose='COCO')
    params["hand"] = True

    opWrapper = op.WrapperPython()
    opWrapper.configure(params)
    opWrapper.start()

    # video_capture = cv2.VideoCapture("mmskeleton/deprecated/st_gcn/resource/media/clean_and_jerk.mp4")
    video_capture = cv2.VideoCapture("fall01.mp4")
    pose_tracker = naive_pose_tracker()
    # start recognition
    start_time = time.time()
    frame_index = 0
    gt_labels = []
    with open(
            'mmskeleton/deprecated/st_gcn/resource/kinetics_skeleton/label_name.txt',
            'r') as f:
        for line in f:
            gt_labels.append(line.strip('\n'))

    while (True):
        tic = time.time()

        # get image
        ret, orig_image = video_capture.read()
        # orig_image = cv2.imread("3.jpg")
        if orig_image is None:
            break
        source_H, source_W, _ = orig_image.shape
        # orig_image = cv2.resize(
        #     orig_image, (256 * source_W // source_H, 256))
        H, W, _ = orig_image.shape

        # pose estimation

        datum = op.Datum()
        datum.cvInputData = orig_image
        opWrapper.emplaceAndPop([datum])

        multi_pose = datum.poseKeypoints  # (num_person, num_joint, 3)

        # orig_image = cv2.resize(orig_image, (768, 1024))
        # cv2.imshow("orig_image-GCN", orig_image)
        # cv2.waitKey(0)

        if len(multi_pose.shape) != 3:
            continue

        # normalization
        multi_pose[:, :, 0] = multi_pose[:, :, 0] / W
        multi_pose[:, :, 1] = multi_pose[:, :, 1] / H
        multi_pose[:, :, 0:2] = multi_pose[:, :, 0:2] - 0.5
        multi_pose[:, :, 0][multi_pose[:, :, 2] == 0] = 0
        multi_pose[:, :, 1][multi_pose[:, :, 2] == 0] = 0

        # pose tracking
        # if self.arg.video == 'camera_source':
        #     frame_index = int((time.time() - start_time) * self.arg.fps)
        # else:
        #     frame_index += 1
        frame_index += 1
        pose_tracker.update(multi_pose, frame_index)
        data_numpy = pose_tracker.get_skeleton_sequence()

        data = torch.from_numpy(data_numpy)

        data = data.unsqueeze(0)
        data = data.float().to("cuda:0").detach()
        with open("de.txt", 'w+') as f:
            for i in data[0][0]:
                f.write(str(i) + '\n\n')
        # break
        with torch.no_grad():
            output = model(data).data.cpu().numpy()
        voting_label = int(output.argmax(axis=1))

        print('voting_label_index:{}'.format(voting_label))
        print(len(gt_labels))
        print(gt_labels[voting_label])
        print(output[0][voting_label])
        app_fps = 1 / (time.time() - tic)
        image = render(edge, data_numpy, gt_labels[voting_label],
                       [[gt_labels[voting_label]]], None, orig_image, app_fps)
        cv2.imshow("ST-GCN", image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break