def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4):
    dataset = call_obj(**dataset_cfg)
    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=workers)

    # put model on gpus
    if isinstance(model_cfg, list):
        model = [call_obj(**c) for c in model_cfg]
        model = torch.nn.Sequential(*model)
    else:
        model = call_obj(**model_cfg)
    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=range(gpus)).cuda()
    model.eval()

    results = []
    labels = []
    prog_bar = ProgressBar(len(dataset))
    for data, label in data_loader:
        with torch.no_grad():
            output = model(data).data.cpu().numpy()
        results.append(output)
        labels.append(label)
        for i in range(len(data)):
            prog_bar.update()
    results = np.concatenate(results)
    labels = np.concatenate(labels)

    print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1)))
    print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
Exemplo n.º 2
0
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=2):
    dataset = call_obj(**dataset_cfg)
    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=workers)

    # put model on gpus
    if isinstance(model_cfg, list):
        model = [call_obj(**c) for c in model_cfg]
        model = torch.nn.Sequential(*model)
    else:
        model = call_obj(**model_cfg)
    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=range(gpus)).cuda()
    #model = MMDataParallel(model)
    model.eval()

    results = []
    labels = []
    prog_bar = ProgressBar(len(dataset))
    total_time = 0
    for data, label in data_loader:
        with torch.no_grad():
            start = time.time()
            output = model(data).data.cpu().numpy()

            if torch.cuda.is_available():
                torch.cuda.synchronize()

            t = time.time() - start
            total_time += t

        results.append(output)
        labels.append(label)
        for i in range(len(data)):
            prog_bar.update()
    results = np.concatenate(results)
    labels = np.concatenate(labels)

    #macs, params = get_model_complexity_info(model.cuda(), (3, 300, 18, 2), as_strings=True,
    #                                              print_per_layer_stat=True, verbose=True)
    #print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
    #print('{:<30}  {:<8}'.format('Number of parameters: ', params))

    print("Average infer time: ", total_time / len(data_loader))
    print("Total infer time: ", total_time)
    print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1)))
    print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
Exemplo n.º 3
0
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4):
    #cnt = 0
    #confusion
    conf_matrix = torch.zeros(model_cfg.num_class, model_cfg.num_class)
    #confusion
    set_determined_seed(seed)
    torch.multiprocessing.set_sharing_strategy('file_system')
    dataset = call_obj(**dataset_cfg)
    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=workers)

    # put model on gpus
    if isinstance(model_cfg, list):
        model = [call_obj(**c) for c in model_cfg]
        model = torch.nn.Sequential(*model)
    else:
        model = call_obj(**model_cfg)
    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=get_gpus(gpus)).cuda()
    model.eval()

    results = []
    labels = []
    prog_bar = ProgressBar(len(dataset))
    for data, label in data_loader:
        with torch.no_grad():
            # cnt += 1
            # print("\n"+str(cnt))
            # torch.cuda.empty_cache()
            output = model(data).data.cpu().numpy()
        results.append(output)
        labels.append(label)
        for i in range(len(data)):
            prog_bar.update()
    results = np.concatenate(results)
    labels = np.concatenate(labels)

    #confusion
    conf_matrix = confusion_matrix(
        torch.max(torch.from_numpy(results), 1)[1], labels, conf_matrix)
    np.save('/home/computer/WBH/GCN/INTERGCN/conf.npy', conf_matrix)
    #confusion

    print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1)))
    print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
Exemplo n.º 4
0
def detect(inputs,
           results,
           model_cfg,
           dataset_cfg,
           checkpoint,
           video_dir,
           batch_size=64,
           gpus=1,
           workers=4):
    print('detect start')
    # put model on gpus
    if isinstance(model_cfg, list):
        model = [call_obj(**c) for c in model_cfg]
        model = torch.nn.Sequential(*model)
    else:
        model = call_obj(**model_cfg)
    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=range(gpus)).cuda()
    model.eval()

    results = []
    labels = []
    video_file_list = os.listdir(video_dir)
    prog_bar = ProgressBar(len(video_file_list))
    for video_file in video_file_list:
        data = inputs.get()
        data_loader = data_parse(data, dataset_cfg.pipeline,
                                 dataset_cfg.data_source.num_track)
        data, label = data_loader
        with torch.no_grad():
            data = torch.from_numpy(data)
            # 增加一维,表示batch_size
            data = data.unsqueeze(0)
            data = data.float().to("cuda:0").detach()
            output = model(data).data.cpu().numpy()
        results.append(output)
        labels.append(torch.tensor([label]))
        for i in range(len(data)):
            prog_bar.update()
    print('--------', results, labels, '--------------')
    results = np.concatenate(results)
    labels = np.concatenate(labels)

    print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1)))
    print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
Exemplo n.º 5
0
def init_twodimestimator(config, checkpoint=None, device='cpu'):
    if isinstance(config, str):
        config = Config.fromfile(config)
        config = config.processor_cfg
    elif isinstance(config, OrderedDict):
        config = config
    else:
        raise ValueError(
            'Input config type is: {}, expect "str" or "Orderdict"'.format(
                type(config)))
    model_cfg = config.model_cfg

    if isinstance(model_cfg, list):
        model = [call_obj(**c) for c in model_cfg]
        model = torch.nn.Sequential(*model)
    else:
        model = call_obj(**model_cfg)
    load_checkpoint(model, checkpoint, map_location=device)
    model.to(device)
    model = model.eval()

    return model
Exemplo n.º 6
0
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4):

    model = call_obj(**model_cfg)
    edge = model.graph.edge
    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=range(gpus)).cuda()
    model.eval()

    sys.path.append('{}/{}/build/python'.format(os.getcwd(), "openpose"))

    try:
        from openpose import pyopenpose as op
    except:
        print('Can not find Openpose Python API.')
        return
    opWrapper = op.WrapperPython()

    params = dict(model_folder='openpose/models', model_pose='COCO')
    params["hand"] = True

    opWrapper = op.WrapperPython()
    opWrapper.configure(params)
    opWrapper.start()

    # video_capture = cv2.VideoCapture("mmskeleton/deprecated/st_gcn/resource/media/clean_and_jerk.mp4")
    video_capture = cv2.VideoCapture("fall01.mp4")
    pose_tracker = naive_pose_tracker()
    # start recognition
    start_time = time.time()
    frame_index = 0
    gt_labels = []
    with open(
            'mmskeleton/deprecated/st_gcn/resource/kinetics_skeleton/label_name.txt',
            'r') as f:
        for line in f:
            gt_labels.append(line.strip('\n'))

    while (True):
        tic = time.time()

        # get image
        ret, orig_image = video_capture.read()
        # orig_image = cv2.imread("3.jpg")
        if orig_image is None:
            break
        source_H, source_W, _ = orig_image.shape
        # orig_image = cv2.resize(
        #     orig_image, (256 * source_W // source_H, 256))
        H, W, _ = orig_image.shape

        # pose estimation

        datum = op.Datum()
        datum.cvInputData = orig_image
        opWrapper.emplaceAndPop([datum])

        multi_pose = datum.poseKeypoints  # (num_person, num_joint, 3)

        # orig_image = cv2.resize(orig_image, (768, 1024))
        # cv2.imshow("orig_image-GCN", orig_image)
        # cv2.waitKey(0)

        if len(multi_pose.shape) != 3:
            continue

        # normalization
        multi_pose[:, :, 0] = multi_pose[:, :, 0] / W
        multi_pose[:, :, 1] = multi_pose[:, :, 1] / H
        multi_pose[:, :, 0:2] = multi_pose[:, :, 0:2] - 0.5
        multi_pose[:, :, 0][multi_pose[:, :, 2] == 0] = 0
        multi_pose[:, :, 1][multi_pose[:, :, 2] == 0] = 0

        # pose tracking
        # if self.arg.video == 'camera_source':
        #     frame_index = int((time.time() - start_time) * self.arg.fps)
        # else:
        #     frame_index += 1
        frame_index += 1
        pose_tracker.update(multi_pose, frame_index)
        data_numpy = pose_tracker.get_skeleton_sequence()

        data = torch.from_numpy(data_numpy)

        data = data.unsqueeze(0)
        data = data.float().to("cuda:0").detach()
        with open("de.txt", 'w+') as f:
            for i in data[0][0]:
                f.write(str(i) + '\n\n')
        # break
        with torch.no_grad():
            output = model(data).data.cpu().numpy()
        voting_label = int(output.argmax(axis=1))

        print('voting_label_index:{}'.format(voting_label))
        print(len(gt_labels))
        print(gt_labels[voting_label])
        print(output[0][voting_label])
        app_fps = 1 / (time.time() - tic)
        image = render(edge, data_numpy, gt_labels[voting_label],
                       [[gt_labels[voting_label]]], None, orig_image, app_fps)
        cv2.imshow("ST-GCN", image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
def init_recognizer(recognition_cfg, device):
    model = call_obj(**(recognition_cfg.model_cfg))
    load_checkpoint(model,
                    recognition_cfg.checkpoint_file,
                    map_location=device)
    return model
Exemplo n.º 8
0
def realtime_detect(detection_cfg,
                    estimation_cfg,
                    model_cfg,
                    dataset_cfg,
                    tracker_cfg,
                    video_dir,
                    category_annotation,
                    checkpoint,
                    batch_size=64,
                    gpus=1,
                    workers=4):
    """
        初始化
    """
    # 初始化模型
    pose_estimators = init_pose_estimator(detection_cfg,
                                          estimation_cfg,
                                          device=0)
    if isinstance(model_cfg, list):
        model = [call_obj(**c) for c in model_cfg]
        model = torch.nn.Sequential(*model)
    else:
        model = call_obj(**model_cfg)
    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=range(gpus)).cuda()
    model.eval()

    # 获取图像
    video_file = 'train/clean/clean10.avi'
    reader = mmcv.VideoReader(os.path.join(video_dir, video_file))
    video_frames = reader[:10000]

    if category_annotation is None:
        video_categories = dict()
    else:
        with open(category_annotation) as f:
            json_file = json.load(f)
            video_categories = json_file['annotations']
            action_class = json_file['categories']
    annotations = []
    num_keypoints = -1
    for i, image in enumerate(video_frames):
        res = inference_pose_estimator(pose_estimators, image)
        res['frame_index'] = i
        if not res['has_return']:
            continue
        num_person = len(res['joint_preds'])
        assert len(res['person_bbox']) == num_person

        for j in range(num_person):
            keypoints = [[p[0], p[1], round(s[0], 2)] for p, s in zip(
                res['joint_preds'][j].round().astype(int).tolist(),
                res['joint_scores'][j].tolist())]
            num_keypoints = len(keypoints)
            person_info = dict(
                person_bbox=res['person_bbox'][j].round().astype(int).tolist(),
                frame_index=res['frame_index'],
                id=j,
                person_id=None,
                keypoints=keypoints)
            annotations.append(person_info)
        category_id = video_categories[video_file][
            'category_id'] if video_file in video_categories else -1
        info = dict(video_name=video_file,
                    resolution=reader.resolution,
                    num_frame=len(video_frames),
                    num_keypoints=num_keypoints,
                    keypoint_channels=['x', 'y', 'score'],
                    version='1.0')
        video_info = dict(info=info,
                          category_id=category_id,
                          annotations=annotations)

        data_loader = data_parse(video_info, dataset_cfg.pipeline,
                                 dataset_cfg.data_source.num_track)
        data, label = data_loader
        with torch.no_grad():
            data = torch.from_numpy(data)
            # 增加一维,表示batch_size
            data = data.unsqueeze(0)
            data = data.float().to("cuda:0").detach()
            output = model(data).data.cpu().numpy()
        top1 = output.argmax()
        if output[:, top1] > 3:
            label = action_class[top1]
        else:
            label = 'unknow'
        print("reslt:", output)

        res['render_image'] = render(image, res['joint_preds'], label,
                                     res['person_bbox'],
                                     detection_cfg.bbox_thre)
        cv2.imshow('image', image)
        cv2.waitKey(10)
Exemplo n.º 9
0
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4):

    model = call_obj(**model_cfg)
    edge = model.graph.edge
    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=range(gpus)).cuda()
    model.eval()

    sys.path.append('{}/{}/build/python'.format(os.getcwd(), "openpose"))

    try:
        from openpose import pyopenpose as op
    except:
        print('Can not find Openpose Python API.')
        return
    opWrapper = op.WrapperPython()

    params = dict(model_folder='openpose/models', model_pose='COCO')
    params["hand"] = True

    opWrapper = op.WrapperPython()
    opWrapper.configure(params)
    opWrapper.start()

    # self.model.eval()
    # pose_tracker = naive_pose_tracker()
    #

    video_capture = cv2.VideoCapture(
        "mmskeleton/deprecated/st_gcn/resource/media/ta_chi.mp4")
    # video_capture = cv2.VideoCapture("fall01.mp4")
    pose_tracker = naive_pose_tracker()
    # start recognition
    start_time = time.time()
    frame_index = 0
    gt_labels = []
    with open('configs/recognition/st_gcn/xview/label.txt', 'r') as f:
        for line in f:
            gt_labels.append(line.strip('\n'))

    while (True):

        tic = time.time()

        # get image
        ret, orig_image = video_capture.read()
        # orig_image = cv2.imread("3.jpg")
        if orig_image is None:
            break
        source_H, source_W, _ = orig_image.shape
        # orig_image = cv2.resize(
        #     orig_image, (256 * source_W // source_H, 256))
        H, W, _ = orig_image.shape

        # pose estimation

        datum = op.Datum()
        datum.cvInputData = orig_image
        opWrapper.emplaceAndPop([datum])
        body_ntu = dict()
        body_ntu_list = []
        left_hand = datum.handKeypoints[
            0]  # keypoints:(num_person, num_joint, 3)
        right_hand = datum.handKeypoints[1]
        body_ntu["1"] = datum.poseKeypoints[0][8]
        body_ntu["2"] = np.array([
            datum.poseKeypoints[0][8][0],
            (datum.poseKeypoints[0][8][1] + datum.poseKeypoints[0][1][1]) / 2,
            datum.poseKeypoints[0][8][2]
        ])
        body_ntu["3"] = np.array([
            datum.poseKeypoints[0][0][0],
            (datum.poseKeypoints[0][0][1] + datum.poseKeypoints[0][1][1]) / 2,
            datum.poseKeypoints[0][0][2]
        ])
        body_ntu["4"] = datum.poseKeypoints[0][0]
        body_ntu["5"] = datum.poseKeypoints[0][5]
        body_ntu["6"] = datum.poseKeypoints[0][6]
        body_ntu["7"] = datum.poseKeypoints[0][7]
        body_ntu["8"] = left_hand[0][0]
        body_ntu["9"] = datum.poseKeypoints[0][2]
        body_ntu["10"] = datum.poseKeypoints[0][3]
        body_ntu["11"] = datum.poseKeypoints[0][4]
        body_ntu["12"] = right_hand[0][0]
        body_ntu["13"] = datum.poseKeypoints[0][12]
        body_ntu["14"] = datum.poseKeypoints[0][13]
        body_ntu["15"] = datum.poseKeypoints[0][14]
        body_ntu["16"] = datum.poseKeypoints[0][19]
        body_ntu["17"] = datum.poseKeypoints[0][9]
        body_ntu["18"] = datum.poseKeypoints[0][10]
        body_ntu["19"] = datum.poseKeypoints[0][11]
        body_ntu["20"] = datum.poseKeypoints[0][22]
        body_ntu["21"] = datum.poseKeypoints[0][1]
        body_ntu["22"] = left_hand[0][12]
        body_ntu["23"] = left_hand[0][4]
        body_ntu["24"] = right_hand[0][12]
        body_ntu["25"] = right_hand[0][4]
        for key in body_ntu:
            x, y, z = body_ntu[key]
            # cv2.putText(orig_image, key, (int(x), int(y)),
            #             cv2.FONT_HERSHEY_SIMPLEX, 5,
            #             (255, 255, 255))
            body_ntu_list.append([x, y, z])
        multi_pose = np.asarray([body_ntu_list])

        # print(np.floor(multi_pose))
        # cv2.imshow("OpenPose 1.5.1 - Tutorial Python API", fff)
        # cv2.waitKey(0)

        # orig_image = cv2.resize(orig_image, (768, 1024))
        # cv2.imshow("orig_image-GCN", orig_image)
        # cv2.waitKey(0)

        if len(multi_pose.shape) != 3:
            continue

        # normalization
        multi_pose[:, :, 0] = multi_pose[:, :, 0] / W
        multi_pose[:, :, 1] = multi_pose[:, :, 1] / H
        multi_pose[:, :, 0:2] = multi_pose[:, :, 0:2] - 0.5
        multi_pose[:, :, 0][multi_pose[:, :, 2] == 0] = 0
        multi_pose[:, :, 1][multi_pose[:, :, 2] == 0] = 0

        # pose tracking
        # if self.arg.video == 'camera_source':
        #     frame_index = int((time.time() - start_time) * self.arg.fps)
        # else:
        #     frame_index += 1
        frame_index += 1
        pose_tracker.update(multi_pose, frame_index)
        data_numpy = pose_tracker.get_skeleton_sequence()

        data = torch.from_numpy(data_numpy)

        data = data.unsqueeze(0)
        data = data.float().to("cuda:0").detach()
        with open("de.txt", 'w+') as f:
            for i in data[0][0]:
                f.write(str(i) + '\n\n')
        # break
        with torch.no_grad():
            output = model(data).data.cpu().numpy()
        voting_label = int(output.argmax(axis=1))

        print('voting_label_index:{}'.format(voting_label))
        print(gt_labels[voting_label])
        print(output[0][voting_label])
        app_fps = 1 / (time.time() - tic)
        image = render(edge, data_numpy, "fall_down",
                       [[gt_labels[voting_label]]], None, orig_image, app_fps)
        cv2.imshow("ST-GCN", image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
def test(test_cfg,
         model_cfg,
         dataset_cfg,
         checkpoint,
         batch_size,
         work_dir,
         gpus=1,
         workers=4):

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    dataset = call_obj(**dataset_cfg,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           normalize,
                       ]))

    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size * gpus,
                                              shuffle=False,
                                              num_workers=workers * gpus)

    # put model on gpus
    if isinstance(model_cfg, list):
        model = [call_obj(**c) for c in model_cfg]
        model = torch.nn.Sequential(*model)
    else:
        model = call_obj(**model_cfg)

    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=range(gpus)).cuda()
    model.eval()
    # prepare for evaluation
    num_samples = len(dataset)
    prog_bar = ProgressBar(num_samples // (batch_size * gpus) + 1)
    all_preds = np.zeros((num_samples, model_cfg.skeleton_head.num_joints, 3),
                         dtype=np.float32)

    all_boxes = np.zeros((num_samples, 6))
    filenames = []
    imgnums = []
    image_path = []
    idx = 0

    # copy from hrnet
    with torch.no_grad():
        for i, (input, meta, target, target_weight) in enumerate(data_loader):
            # get prediction
            outputs = model.forward(input, return_loss=False)
            if isinstance(outputs, list):
                output = outputs[-1]
            else:
                output = outputs
            # filp test
            if test_cfg.flip:
                input_flipped = np.flip(input.cpu().numpy(), 3).copy()
                input_flipped = torch.from_numpy(input_flipped).cuda()
                outputs_flipped = model(input_flipped, return_loss=False)
                if isinstance(outputs_flipped, list):
                    output_flipped = outputs_flipped[-1]
                else:
                    output_flipped = outputs_flipped
                output_flipped = flip_back(output_flipped.cpu().numpy(),
                                           dataset.flip_pairs)
                output_flipped = torch.from_numpy(output_flipped.copy()).cuda()
                # feature is not aligned, shift flipped heatmap for higher accuracy
                if test_cfg.shift_heatmap:
                    output_flipped[:, :, :, 1:] = \
                        output_flipped.clone()[:, :, :, 0:-1]
                output = (output + output_flipped) * 0.5

            c = meta['center'].numpy()
            s = meta['scale'].numpy()
            score = meta['score'].numpy()

            num_images = input.size(0)
            preds, maxvals = get_final_preds(test_cfg.post_process,
                                             output.detach().cpu().numpy(), c,
                                             s)

            all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
            all_preds[idx:idx + num_images, :, 2:3] = maxvals
            # double check this all_boxes parts
            all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
            all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
            all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1)
            all_boxes[idx:idx + num_images, 5] = score
            image_path.extend(meta['image'])

            idx += num_images
            prog_bar.update()

        name_values, perf_indicator = dataset.evaluate(test_cfg, all_preds,
                                                       work_dir, all_boxes,
                                                       image_path, filenames,
                                                       imgnums)
    return perf_indicator