示例#1
0
    def make_prediction(self):
        if self.with_cuda:
            self.model = self.model.cuda()
        with torch.no_grad():
            self.model.eval()
            for _, batch, batch_2d in self.test_generator.next_epoch():
                inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
                if self.with_cuda:
                    inputs_2d = inputs_2d.cuda()

            predicted_3d_pos = self.model(inputs_2d)

            if self.test_generator.augment_enabled():
                predicted_3d_pos[1, :, :, 0] *= -1
                predicted_3d_pos[1, :, self.joints_left +
                                 self.joints_right] = predicted_3d_pos[
                                     1, :,
                                     self.joints_right + self.joints_left]
                predicted_3d_pos = torch.mean(predicted_3d_pos,
                                              dim=0,
                                              keepdim=True)

            predicted_3d_pos = predicted_3d_pos.squeeze(0).cpu().numpy()
            rot = self.dataset.cameras()['detectron2'][0]['orientation']
            predicted_3d_pos = camera_to_world(predicted_3d_pos, R=rot, t=0)
            predicted_3d_pos[:, :, 2] -= np.min(predicted_3d_pos[:, :, 2])
            self.prediction = predicted_3d_pos
示例#2
0
def interface(model_pos, keypoints, W, H):
    # input (N, 17, 2) return (N, 17, 3)
    if not isinstance(keypoints, np.ndarray):
        keypoints = np.array(keypoints)

    from common.camera import normalize_screen_coordinates_new, camera_to_world, normalize_screen_coordinates
    #  keypoints = normalize_screen_coordinates_new(keypoints[..., :2], w=W, h=H)
    keypoints = normalize_screen_coordinates(keypoints[..., :2],
                                             w=1000,
                                             h=1002)
    input_keypoints = keypoints.copy()
    # test_time_augmentation True
    from common.generators import UnchunkedGenerator
    gen = UnchunkedGenerator(None,
                             None, [input_keypoints],
                             pad=common.pad,
                             causal_shift=common.causal_shift,
                             augment=True,
                             kps_left=common.kps_left,
                             kps_right=common.kps_right,
                             joints_left=common.joints_left,
                             joints_right=common.joints_right)
    prediction = evaluate(gen, model_pos, return_predictions=True)
    prediction = camera_to_world(prediction, R=common.rot, t=0)
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])
    return prediction
def gen_pose_frame(kpts, width, height, model_pos, pad, causal_shift=0):
    # kpts: (M, T, N, 2)
    norm_seqs = []
    for kpt in kpts:
        norm_kpt = normalize_screen_coordinates(kpt, w=width, h=height)
        norm_seqs.append(norm_kpt)

    gen = UnchunkedGenerator(None,
                             None,
                             norm_seqs,
                             pad=pad,
                             causal_shift=causal_shift,
                             augment=True,
                             kps_left=kps_left,
                             kps_right=kps_right,
                             joints_left=joints_left,
                             joints_right=joints_right)
    prediction = evaluate(gen, model_pos)

    prediction_to_world = []
    for i in range(len(prediction)):
        sub_prediction = prediction[i][0]
        sub_prediction = camera_to_world(sub_prediction, R=rot, t=0)
        sub_prediction[:, 2] -= np.amin(sub_prediction[:, 2])
        prediction_to_world.append(sub_prediction)

    return prediction_to_world
示例#4
0
def predict(img_path):
    # 1.检测关节点并显示
    # 预处理输入图像和检测人体
    x, img = data.transforms.presets.yolo.load_test(img_path, short=256)
    # print("Shape of pre-processed image:", x.shape)

    start = time.time()

    # detect persons and bbox
    class_ids, scores, bounding_boxes = detector(x)

    # 2.预处理检测器的输出张量作为alpha_pose的输入
    pose_input, upscale_bbox = detector_to_simple_pose(img, class_ids, scores, bounding_boxes)

    global detector_time
    detector_time += (time.time() - start)

    print("detector cost time: {:.3f} seconds".format(time.time() - start))
    prepare_end = time.time()

    # 3.预测关节点
    if pose_input is None:
        return None, None
    predicted_heatmap = pose_net(pose_input)
    pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)
    global predictor_2d_time
    predictor_2d_time += (time.time() - prepare_end)
    print("2d pose predictor cost time: {:.3f} seconds".format(time.time() - prepare_end))

    # 4.显示2d姿态
    # utils.viz.plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxes, scores, box_thresh=0.5,
    #                          keypoint_thresh=0.2)

    # 5.坐标标准化
    prepare_end = time.time()
    kps = normalize_screen_coordinates(pred_coords.asnumpy(), w=img.shape[1], h=img.shape[0])

    receptive_field = pose3d_predictor.receptive_field()
    pad = (receptive_field - 1) // 2  # Padding on each side
    causal_shift = 0

    # 6.创建生成器作为3d预测器的输入
    generator = UnchunkedGenerator(None, None, [kps], pad=pad, causal_shift=causal_shift, augment=False)

    # 7.3d姿势估计和显示
    prediction = predict_3d_pos(generator, pose3d_predictor)
    global full_time, predictor_3d_time
    predictor_3d_time += time.time() - prepare_end
    full_time += time.time() - start
    print("3d predictor time: {:.3f} seconds".format(time.time() - prepare_end))

    rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804], dtype=np.float32)
    prediction = camera_to_world(prediction, R=rot, t=0)
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])
    return prediction, img
def predict_3d_joints(predictor, coords_2d, w, h):
    # 坐标标准化
    kps = normalize_screen_coordinates(coords_2d, w, h)
    # print('kps.type: {}, kps.shape: {}'.format(type(kps), kps.shape))

    # 2d keypoints生成器
    receptive_field = predictor.receptive_field()
    pad = (receptive_field - 1) // 2  # Padding on each side
    causal_shift = 0

    # 创建生成器作为3d预测器的输入
    generator = UnchunkedGenerator(None, None, [kps], pad=pad, causal_shift=causal_shift, augment=False)
    prediction = predict_3d_pos(generator, predictor)
    prediction = camera_to_world(prediction, R=rot, t=0)
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])
    return prediction
def gen_pose(kpts,
             valid_frames,
             width,
             height,
             model_pos,
             pad,
             causal_shift=0):
    assert len(kpts.shape) == 4, 'The shape of kpts: {}'.format(kpts.shape)
    assert kpts.shape[0] == len(valid_frames)

    norm_seqs = []
    for index, frames in enumerate(valid_frames):
        seq_kps = kpts[index, frames]
        norm_seq_kps = normalize_screen_coordinates(seq_kps, w=width, h=height)
        norm_seqs.append(norm_seq_kps)

    gen = UnchunkedGenerator(None,
                             None,
                             norm_seqs,
                             pad=pad,
                             causal_shift=causal_shift,
                             augment=True,
                             kps_left=kps_left,
                             kps_right=kps_right,
                             joints_left=joints_left,
                             joints_right=joints_right)
    prediction = evaluate(gen, model_pos)

    prediction_to_world = []
    for i in range(len(prediction)):
        sub_prediction = prediction[i]

        sub_prediction = camera_to_world(sub_prediction, R=rot, t=0)

        # sub_prediction[:, :, 2] -= np.expand_dims(np.amin(sub_prediction[:, :, 2], axis=1), axis=1).repeat([17], axis=1)
        # sub_prediction[:, :, 2] -= np.amin(sub_prediction[:, :, 2])

        prediction_to_world.append(sub_prediction)

    # prediction_to_world = np.asarray(prediction_to_world, dtype=np.float32)
    return prediction_to_world
def gen_pose_frame_(kpts, width, height, model_pos, pad, causal_shift=0):
    # input (N, 17, 2) return (N, 17, 3)
    if not isinstance(kpts, np.ndarray):
        kpts = np.array(kpts)

    keypoints = normalize_screen_coordinates(kpts[..., :2], w=width, h=height)

    input_keypoints = keypoints.copy()
    # test_time_augmentation True
    from common.generators import UnchunkedGenerator
    gen = UnchunkedGenerator(None,
                             None, [input_keypoints],
                             pad=pad,
                             causal_shift=causal_shift,
                             augment=True,
                             kps_left=kps_left,
                             kps_right=kps_right,
                             joints_left=joints_left,
                             joints_right=joints_right)
    prediction = evaluate(gen, model_pos)
    prediction = camera_to_world(prediction[0], R=rot, t=0)
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])
    return prediction
def video_pose(filepath,
               ckpt_dir,
               ckpt_name,
               filter_widths,
               show=False,
               channels=1024,
               save_file='output.mp4'):
    # 加载3d姿势估计器
    pose3d_predictor = get_pose3d_predictor(ckpt_dir,
                                            ckpt_name,
                                            filter_widths,
                                            channels=channels)

    receive_field = 1
    for i in filter_widths:
        receive_field *= i
    #     print(receive_field)
    half = receive_field // 2
    # 读取视频
    cap = cv2.VideoCapture(filepath)
    # 设置分辨率
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 360)

    # 帧率和帧数
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    print("Original FPS: {}, frame count: {}".format(fps, frame_count))
    # pause = int(1000 / fps)

    if show:
        # 宽高
        cv2.namedWindow('Video', 0)
        cv2.resizeWindow('Video', 960, 540)

    # 保存视频文件
    print("Save the result to {}.".format(save_file))
    wh = (1280, 720)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_mp4 = cv2.VideoWriter(save_file, fourcc, fps, wh)

    coords_2d_list = []
    dicts = []
    i = 0
    # 因为设置了数据生成器的pad=0,因此需要获取前receive_field//2帧做准备
    elapsed_time = 0
    print("Preparing...")
    while i < half:
        ret_val, frame = cap.read()
        if ret_val != 1:
            print("Video is too short!")
            output_mp4.release()
            cap.release()
            cv2.destroyAllWindows()
            return
        # noinspection PyBroadException
        try:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        except:
            continue

        # 生成2d关键点
        current_frame = cap.get(cv2.CAP_PROP_POS_FRAMES)
        joints_dict = detect_2d_joints(frame, current_frame)
        dicts.append(joints_dict)
        img, predict_coords = joints_dict['img'], joints_dict['coords']
        normalized_coords = normalize_screen_coordinates(
            predict_coords.asnumpy()[0], w=img.shape[1], h=img.shape[0])
        coords_2d_list.append(normalized_coords)
        i += 1

    print("Starting to predict 3d pose...")
    fps_time = time.time()
    while True:
        #  获取帧
        i += 1
        if i > receive_field and len(dicts) < 1:
            break
        ret_val, frame = cap.read()
        if ret_val == 1:
            try:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            except:
                continue
            # 生成2d关键点
            current_frame = cap.get(cv2.CAP_PROP_POS_FRAMES)
            joints_dict = detect_2d_joints(frame, current_frame)
            dicts.append(joints_dict)
            img, predict_coords = joints_dict['img'], joints_dict['coords']
            normalized_coords = normalize_screen_coordinates(
                predict_coords.asnumpy()[0], w=img.shape[1], h=img.shape[0])
            coords_2d_list.append(normalized_coords)

        joints_dict = dicts[0]
        if i > half + 1:
            # 去除最左端的无用帧
            coords_2d_list = coords_2d_list[1:]
            dicts = dicts[1:]

        if len(coords_2d_list) < receive_field:
            if i < receive_field:
                # 视频开头小于receive_field帧时,在左边进行pad操作
                #                 print("kps_list length is {}, padding {} frames to left end.".format(len(kps_list), half))
                while len(coords_2d_list) < receive_field:
                    coords_2d_list.insert(0, coords_2d_list[0])
            elif len(coords_2d_list) > 0:
                # 视频末尾不足receive_field帧时,在右边进行pad操作
                #                 print("kps_list length is {}, padding 1 frames to right end.".format(len(kps_list)))
                coords_2d_list.append(coords_2d_list[-1])
            else:
                break
        # 构造2d关键点生成器
        kps_2d = np.stack(coords_2d_list)
        generator = joints_2d_generator(kps_2d, pose3d_predictor)
        #         print(generator.num_frames())

        # 3d关键点预测
        predictions = predict_3d_pos(generator, pose3d_predictor)
        #         print('predictions.shape: ', predictions.shape)

        rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804],
                       dtype=np.float32)
        predictions = camera_to_world(predictions, R=rot, t=0)
        # We don't have the trajectory, but at least we can rebase the height
        predictions[:, :, 2] -= np.min(predictions[:, :, 2])

        coords_3d = predictions[0]

        #         print('predicted {} frame, elapsed time: {:.3f} seconds.'.format(predictions.shape[0], time.time() - fps_time))
        interval = time.time() - fps_time
        elapsed_time += interval
        fps = 1.0 / interval

        # 渲染图像
        result_image = render_image(coords_3d=coords_3d,
                                    skeleton=Skeleton(),
                                    **joints_dict)
        cv2.putText(result_image, "FPS: %.3f" % fps, (10, 20),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        result_image = cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR)

        if show:
            # 实时显示
            cv2.imshow('Video', result_image)
            if cv2.waitKey(1) & 0xff == ord('q'):
                break

        # resize and write
        to_write = cv2.resize(result_image, wh)
        output_mp4.write(to_write)
        fps_time = time.time()

    output_mp4.release()
    cap.release()
    cv2.destroyAllWindows()
    print("Average Fps: {:.3f}".format(frame_count / elapsed_time))
def reconstruction(args):
    """
    Generate 3D poses from 2D keypoints detected from video, and visualize it
        :param chk_file: The file path of model weight
        :param kps_file: The file path of 2D keypoints
        :param viz_output: The output path of animation
        :param video_path: The input video path
        :param kpts_format: The format of 2D keypoints, like MSCOCO, MPII, H36M, OpenPose. The default format is H36M
    """

    print('Loading 2D keypoints ...')
    keypoints, scores, _, _ = load_json(args.keypoints_file)

    # Loading only one person's keypoints
    if len(keypoints.shape) == 4:
        keypoints = keypoints[0]
    assert len(keypoints.shape) == 3

    # Transform the keypoints format from different dataset (MSCOCO, MPII) to h36m format
    if args.kpts_format == 'coco':
        keypoints, valid_frames = coco_h36m(keypoints)
    elif args.kpts_format == 'mpii':
        keypoints, valid_frames = mpii_h36m(keypoints)
    elif args.kpts_format == 'openpose':
        # Convert 'Openpose' format to MSCOCO
        order_coco = [i for i in range(17) if i != 1]
        keypoints = keypoints[:order_coco]
        keypoints, valid_frames = coco_h36m(keypoints)
    else:
        valid_frames = np.where(
            np.sum(keypoints.reshape(-1, 34), axis=1) != 0)[0]
        assert args.kpts_format == 'h36m'

    # Get the width and height of video
    cap = cv2.VideoCapture(args.video_path)
    width = int(round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)))
    height = int(round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))

    # normalize keypoints
    input_keypoints = normalize_screen_coordinates(keypoints[..., :2],
                                                   w=width,
                                                   h=height)

    if args.frames == 27:
        filter_widths = [3, 3, 3]
        channels = 128
    elif args.frames == 81:
        filter_widths = [3, 3, 3, 3]
        channels = 64
    else:
        filter_widths = [3, 3, 3, 3, 3]
        channels = 32

    model_pos = SpatioTemporalModel(adj,
                                    17,
                                    2,
                                    17,
                                    filter_widths=filter_widths,
                                    channels=channels,
                                    dropout=0.05)

    if torch.cuda.is_available():
        model_pos = model_pos.cuda()

    # load trained model
    print('Loading checkpoint', args.weight)
    chk_file = os.path.join('./checkpoint', args.weight)
    checkpoint = torch.load(chk_file,
                            map_location=lambda storage, loc: storage)
    model_pos.load_state_dict(checkpoint['model_pos'])

    receptive_field = model_pos.receptive_field()
    pad = (receptive_field - 1) // 2  # Padding on each side
    causal_shift = 0

    print('Reconstructing ...')
    gen = UnchunkedGenerator(None,
                             None, [input_keypoints[valid_frames]],
                             pad=pad,
                             causal_shift=causal_shift,
                             augment=True,
                             kps_left=kps_left,
                             kps_right=kps_right,
                             joints_left=joints_left,
                             joints_right=joints_right)
    prediction = evaluate(gen, model_pos, return_predictions=True)
    prediction = camera_to_world(prediction, R=rot, t=0)

    # We don't have the trajectory, but at least we can rebase the height
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])

    prediction_new = np.zeros((*input_keypoints.shape[:-1], 3),
                              dtype=np.float32)
    prediction_new[valid_frames] = prediction

    print('Rendering ...')
    anim_output = {'Reconstruction': prediction_new}
    render_animation(keypoints,
                     keypoints_metadata,
                     anim_output,
                     h36m_skeleton,
                     25,
                     3000,
                     np.array(70., dtype=np.float32),
                     args.viz_output,
                     limit=-1,
                     downsample=1,
                     size=5,
                     input_video_path=args.video_path,
                     viewport=(width, height),
                     input_video_skip=0)
示例#10
0
def main():
    dataset_path = "./data/data_3d_h36m.npz"    # 加载数据
    from common.h36m_dataset import Human36mDataset
    dataset = Human36mDataset(dataset_path)
    dataset = read_3d_data(dataset)
    cudnn.benchmark = True
    device = torch.device("cpu")
    from models.sem_gcn import SemGCN
    from common.graph_utils import adj_mx_from_skeleton
    p_dropout = None
    adj = adj_mx_from_skeleton(dataset.skeleton())
    model_pos = SemGCN(adj, 128, num_layers=4, p_dropout=p_dropout,
                       nodes_group=dataset.skeleton().joints_group()).to(device)
    ckpt_path = "./checkpoint/pretrained/ckpt_semgcn_nonlocal_sh.pth.tar"
    ckpt = torch.load(ckpt_path, map_location='cpu')
    model_pos.load_state_dict(ckpt['state_dict'], False)
    model_pos.eval()
    # ============ 新增代码 ==============
    # 从项目处理2d数据的代码中输出的一个人体数据
    inputs_2d = [[483.0, 450], [503, 450], [503, 539], [496, 622], [469, 450], [462, 546], [469, 622], [483, 347],
                 [483, 326], [489, 264], [448, 347], [448, 408], [441, 463], [517, 347], [524, 408], [538, 463]]

    # # openpose的测试样例识别结果
    # inputs_2d = [[86.0, 137], [99, 128], [94, 127], [97, 110], [89, 105], [102, 129], [116, 116], [99, 110],
    #              [105, 93], [117, 69], [147, 63], [104, 93], [89, 69], [82, 38], [89, 139], [94, 140]]

    inputs_2d = np.array(inputs_2d)
    # inputs_2d[:, 1] = np.max(inputs_2d[:, 1]) - inputs_2d[:, 1]   # 变成正的人体姿态,原始数据为倒立的

    cam = dataset.cameras()['S1'][0]    # 获取相机参数
    inputs_2d[..., :2] = normalize_screen_coordinates(inputs_2d[..., :2], w=cam['res_w'], h=cam['res_h'])  # 2d坐标处理

    # 画出归一化屏幕坐标并且标记序号的二维关键点图像
    print(inputs_2d)    # 打印归一化后2d关键点坐标
    d_x = inputs_2d[:, 0]
    d_y = inputs_2d[:, 1]
    plt.figure()
    plt.scatter(d_x, d_y)
    for i, txt in enumerate(np.arange(inputs_2d.shape[0])):
        plt.annotate(txt, (d_x[i], d_y[i]))     # 标号
    # plt.show()      # 显示2d关键点归一化后的图像

    # 获取3d结果
    inputs_2d = torch.tensor(inputs_2d, dtype=torch.float32)    # 转换为张量
    outputs_3d = model_pos(inputs_2d).cpu()         # 加载模型
    outputs_3d[:, :, :] -= outputs_3d[:, :1, :]     # Remove global offset / 移除全球偏移
    predictions = [outputs_3d.detach().numpy()]     # 预测结果
    prediction = np.concatenate(predictions)[0]     # 累加取第一个
    # Invert camera transformation  / 反相机的转换
    prediction = camera_to_world(prediction, R=cam['orientation'], t=0)     # R和t的参数设置影响不大,有多种写法和选取的相机参数有关,有些S没有t等等问题
    prediction[:, 2] -= np.min(prediction[:, 2])    # 向上偏移min(prediction[:, 2]),作用是把坐标变为正数
    print('prediction')
    print(prediction)   # 打印画图的3d坐标
    plt.figure()
    ax = plt.subplot(111, projection='3d')  # 创建一个三维的绘图工程
    o_x = prediction[:, 0]
    o_y = prediction[:, 1]
    o_z = prediction[:, 2]
    print(o_x)
    print(o_y)
    print(o_z)
    ax.scatter(o_x, o_y, o_z)

    temp = o_x
    x = [temp[9], temp[8], temp[7], temp[10], temp[11], temp[12]]
    temp = o_y
    y = [temp[9], temp[8], temp[7], temp[10], temp[11], temp[12]]
    temp = o_z
    z = [temp[9], temp[8], temp[7], temp[10], temp[11], temp[12]]
    ax.plot(x, y, z)

    temp = o_x
    x = [temp[7], temp[0], temp[4], temp[5], temp[6]]
    temp = o_y
    y = [temp[7], temp[0], temp[4], temp[5], temp[6]]
    temp = o_z
    z = [temp[7], temp[0], temp[4], temp[5], temp[6]]
    ax.plot(x, y, z)

    temp = o_x
    x = [temp[0], temp[1], temp[2], temp[3]]
    temp = o_y
    y = [temp[0], temp[1], temp[2], temp[3]]
    temp = o_z
    z = [temp[0], temp[1], temp[2], temp[3]]
    ax.plot(x, y, z)

    temp = o_x
    x = [temp[7], temp[13], temp[14], temp[15]]
    temp = o_y
    y = [temp[7], temp[13], temp[14], temp[15]]
    temp = o_z
    z = [temp[7], temp[13], temp[14], temp[15]]
    ax.plot(x, y, z)

    # temp = o_x
    # x = [temp[0], temp[14]]
    # temp = o_y
    # y = [temp[0], temp[14]]
    # temp = o_z
    # z = [temp[0], temp[14]]
    # ax.plot(y, x, z)
    #
    # temp = o_x
    # x = [temp[0], temp[15]]
    # temp = o_y
    # y = [temp[0], temp[15]]
    # temp = o_z
    # z = [temp[0], temp[15]]
    # ax.plot(y, x, z)

    # 改变坐标比例的代码,该代码的效果是z坐标轴是其他坐标的两倍
    from matplotlib.pyplot import MultipleLocatort
    major_locator = MultipleLocator(0.5)
    ax.xaxis.set_major_locator(major_locator)
    ax.yaxis.set_major_locator(major_locator)
    ax.zaxis.set_major_locator(major_locator)
    ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax), np.diag([0.5, 0.5, 1, 1]))

    plt.show()
示例#11
0
                    action_list.append(action)
            print(action_list)
            print(len(action_list))
            for action in action_list:
                position_list = []
                for idx,carema_id in enumerate(id_order):
                    f = args.from_source + '/' + subject + '/MyPoseFeatures/D3_Positions_mono/'+action+'.'+carema_id+'.cdf.mat'
                    # if subject == 'S11' and action == 'Directions':
                    #     continue # Discard corrupted video

                    # Use consistent naming convention

                    hf = loadmat(f)
                    positions = hf['data'][0, 0].reshape(-1, 32, 3)
                    positions /= 1000 # Meters instead of millimeters
                    positions_universal = camera_to_world(positions,R=np.array(camera_info[idx]['orientation']),t=np.array(camera_info[idx]['translation'])/1000)
                    position_list.append(positions_universal.astype('float32'))
                canonical_name = action.replace('TakingPhoto', 'Photo') \
                                       .replace('WalkingDog', 'WalkDog')
                # if action == 'Directions 1':
                #     print('checking...')
                #     print(position_list[0]-position_list[1])
                #     print(position_list[1]-position_list[2])
                #     print(position_list[2]-position_list[3])
                output[subject][canonical_name] = sum(position_list)/4
                if action == 'Directions 1':
                    print(output[subject][canonical_name])
                    print(output[subject][canonical_name]-position_list[0])
        
        print('Saving...')
        #np.savez_compressed(output_filename, positions_3d=output)
示例#12
0
def draw_3Dimg_c3dpo(pos, image, display=None, kpt2D=None, shape=None):
    from mpl_toolkits.mplot3d import Axes3D  # projection 3D 必须要这个
    from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
    fig = plt.figure(figsize=(12, 6))
    canvas = FigureCanvas(fig)

    # 2D
    fig.add_subplot(131)
    if isinstance(kpt2D, np.ndarray):
        plt.imshow(draw_2Dimg(image, kpt2D))
    else:
        plt.imshow(image)

    # c3dpo 矫正
    # if shape is not None:
    #     index_list = [0, 5, 7, 9, 6, 8, 10, 11, 13, 15, 12, 14, 16]
    #     pos[index_list] = shape
    # 3D
    ax = fig.add_subplot(132, projection='3d')
    radius = 1.7
    ax.view_init(elev=15., azim=70.)
    ax.set_xlim3d([-radius / 2, radius / 2])
    ax.set_zlim3d([0, radius])
    ax.set_ylim3d([-radius / 2, radius / 2])
    ax.set_aspect('equal')
    # 坐标轴刻度
    # ax.set_xticklabels([])
    # ax.set_yticklabels([])
    # ax.set_zticklabels([])
    # ax.dist = 7.5
    parents = common.skeleton_parents
    joints_right = common.joints_right

    for j, j_parent in enumerate(parents):
        if j_parent == -1:
            continue

        col = 'red' if j in joints_right else 'black'
        # 画图3D
        ax.plot([pos[j, 0], pos[j_parent, 0]], [pos[j, 1], pos[j_parent, 1]],
                [pos[j, 2], pos[j_parent, 2]],
                zdir='z',
                c=col)

    # c3dpo
    bx = fig.add_subplot(133, projection='3d')
    bx.view_init(elev=15., azim=15.)
    bx.set_xlim3d([-radius / 2, radius / 2])
    bx.set_zlim3d([0, radius])
    bx.set_ylim3d([-radius / 2, radius / 2])
    bx.set_aspect('equal')
    from common.camera import camera_to_world
    shape = camera_to_world(shape, R=common.c3dpo_rot, t=0)
    shape[:, 2] -= np.min(shape[:, 2])
    order_pair = ((1, 2), (2, 3), (4, 5), (5, 6), (0, 1), (0, 4), (0, 7),
                  (7, 8), (8, 9), (11, 12), (12, 13), (14, 15), (15, 16),
                  (11, 8), (14, 8))
    # scale
    # shape *= 0.7
    for j, j_parent in order_pair:

        bx.plot([shape[j, 0], shape[j_parent, 0]],
                [shape[j, 1], shape[j_parent, 1]],
                [shape[j, 2], shape[j_parent, 2]],
                zdir='z',
                c='red')
    # bx.scatter(shape[:, 0], shape[:, 1], shape[:, 2])

    width, height = fig.get_size_inches() * fig.get_dpi()
    canvas.draw()  # draw the canvas, cache the renderer
    image = np.fromstring(canvas.tostring_rgb(),
                          dtype='uint8').reshape(int(height), int(width), 3)
    if display:
        cv2.imshow('im', image)
        cv2.waitKey(1)

    return image
示例#13
0
def predict(img_path):
    # 1.预处理输入图像和检测人体
    x, img = data.transforms.presets.yolo.load_test(img_path, short=256)
    # detector.summary(x)
    # print("x.shape:", x.shape)

    start = time.time()

    # detect persons and bbox,
    class_ids, scores, bounding_boxes = detector(x)  # shape: [sample_idx, class_idx, instance]
    # print("bounding_boxes.shape", bounding_boxes.shape, "bounding_boxes[0, 0]:", bounding_boxes[0, 0])

    # 2.预处理检测器的输出张量作为mobile_pose的输入
    pose_input, upscale_bbox = detector_to_mobile_pose(img, class_ids, scores, bounding_boxes)
    print("detector cost time: {:.3f} seconds".format(time.time() - start))
    global detector_time
    detector_time += (time.time() - start)

    if pose_input is None:
        return None, None
    # 4.2d关节点预测
    # pose_net.summary(pose_input)
    start_time = time.time()
    predicted_heatmap = pose_net(pose_input)
    pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)
    # print("type(pre_coords): {}, shape(pre_coords): {}".format(type(pred_coords), pred_coords.shape))
    # print("pred_coords: {}".format(pred_coords))
    global predictor_2d_time
    predictor_2d_time += (time.time() - start_time)
    print("2d pose predictor cost time: {:.3f} seconds".format(time.time() - start_time))

    # 5.显示2d姿态
    # ax = utils.viz.plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxes, scores, box_thresh=0.5,
    #                               keypoint_thresh=0.2)
    # print(pred_coords)
    # 6.坐标标准化
    start_time = time.time()
    kps = normalize_screen_coordinates(pred_coords.asnumpy(), w=img.shape[1], h=img.shape[0])
    # print('kps.type: {}, kps.shape: {}'.format(type(kps), kps.shape))

    # 7.2d keypoints生成器
    receptive_field = pose3d_predictor.receptive_field()
    pad = (receptive_field - 1) // 2  # Padding on each side
    causal_shift = 0

    # 创建生成器作为3d预测器的输入
    generator = UnchunkedGenerator(None, None, [kps], pad=pad, causal_shift=causal_shift, augment=False)

    # 8.3d姿势估计和显示
    prediction = predict_3d_pos(generator, pose3d_predictor)
    global predictor_3d_time, full_time
    predictor_3d_time += (time.time() - start_time)
    full_time += (time.time() - start)
    print("3d pose predictor cost time: {:.3f} seconds".format(time.time() - start_time))
    # print("prediction.shape: ", prediction.shape)

    rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804], dtype=np.float32)
    prediction = camera_to_world(prediction, R=rot, t=0)

    # We don't have the trajectory, but at least we can rebase the height
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])
    elapsed = time.time() - start
    print("Total elapsed time of predicting image file {}: {:.3f} seconds".format(img_path, elapsed))
    return prediction, img
示例#14
0
def videpose_infer(args):
    from common.camera import normalize_screen_coordinates, camera_to_world, image_coordinates
    from common.generators import UnchunkedGenerator
    from common.model import TemporalModel
    from common.utils import Timer, evaluate, add_path
    from videopose import get_detector_2d, ckpt_time, metadata, time0

    import gene_npz

    gene_npz.args.outputpath = str(args.viz_output / "alpha_pose_kunkun_cut")
    print(gene_npz.args)
    # detector_2d = get_detector_2d(args.detector_2d)
    detector_2d = gene_npz.generate_kpts(args.detector_2d)

    assert detector_2d, 'detector_2d should be in ({alpha, hr, open}_pose)'

    # 2D kpts loads or generate
    if not args.input_npz:
        video_name = args.viz_video
        keypoints = detector_2d(video_name)
    else:
        npz = np.load(args.input_npz)
        keypoints = npz['kpts']  # (N, 17, 2)

    keypoints_symmetry = metadata['keypoints_symmetry']
    kps_left, kps_right = list(
        keypoints_symmetry[0]), list(keypoints_symmetry[1])
    joints_left, joints_right = list(
        [4, 5, 6, 11, 12, 13]), list([1, 2, 3, 14, 15, 16])

    # normlization keypoints  Suppose using the camera parameter
    keypoints = normalize_screen_coordinates(
        keypoints[..., :2], w=1000, h=1002)

    model_pos = TemporalModel(17, 2, 17, filter_widths=[3, 3, 3, 3, 3], causal=args.causal, dropout=args.dropout, channels=args.channels,
                              dense=args.dense)

    if torch.cuda.is_available():
        model_pos = model_pos.cuda()

    ckpt, time1 = ckpt_time(time0)
    print('-------------- load data spends {:.2f} seconds'.format(ckpt))

    # load trained model
    chk_filename = os.path.join(
        args.checkpoint, args.resume if args.resume else args.evaluate)
    print('Loading checkpoint', chk_filename)
    checkpoint = torch.load(
        chk_filename, map_location=lambda storage, loc: storage)  # 把loc映射到storage
    model_pos.load_state_dict(checkpoint['model_pos'])

    ckpt, time2 = ckpt_time(time1)
    print('-------------- load 3D model spends {:.2f} seconds'.format(ckpt))

    #  Receptive field: 243 frames for args.arc [3, 3, 3, 3, 3]
    receptive_field = model_pos.receptive_field()
    pad = (receptive_field - 1) // 2  # Padding on each side
    causal_shift = 0

    print('Rendering...')
    input_keypoints = keypoints.copy()
    gen = UnchunkedGenerator(None, None, [input_keypoints],
                             pad=pad, causal_shift=causal_shift, augment=args.test_time_augmentation,
                             kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right)
    prediction = evaluate(gen, model_pos, return_predictions=True)

    # save 3D joint points
    np.save(args.viz_output / "test_3d_output.npy",
            prediction, allow_pickle=True)

    rot = np.array([0.14070565, -0.15007018, -0.7552408,
                   0.62232804], dtype=np.float32)
    prediction = camera_to_world(prediction, R=rot, t=0)

    # We don't have the trajectory, but at least we can rebase the height
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])
    anim_output = {'Reconstruction': prediction}
    input_keypoints = image_coordinates(
        input_keypoints[..., :2], w=1000, h=1002)

    ckpt, time3 = ckpt_time(time2)
    print(
        '-------------- generate reconstruction 3D data spends {:.2f} seconds'.format(ckpt))

    ckpt, time4 = ckpt_time(time3)
    print('total spend {:2f} second'.format(ckpt))
示例#15
0
def main(args):
    print('==> Using settings {}'.format(args))

    convm = torch.zeros(3, 17, 17, dtype=torch.float)

    print('==> Loading dataset...')
    dataset_path = path.join('data', 'data_3d_' + args.dataset + '.npz')
    if args.dataset == 'h36m':
        from common.h36m_dataset import Human36mDataset
        dataset = Human36mDataset(dataset_path)
    else:
        raise KeyError('Invalid dataset')

    print('==> Preparing data...')
    dataset = read_3d_data(dataset)

    print('==> Loading 2D detections...')
    keypoints = create_2d_data(
        path.join('data',
                  'data_2d_' + args.dataset + '_' + args.keypoints + '.npz'),
        dataset)

    cudnn.benchmark = True
    device = torch.device("cuda")

    # Create model
    print("==> Creating model...")

    if args.architecture == 'linear':
        from models.linear_model import LinearModel, init_weights
        num_joints = dataset.skeleton().num_joints()
        model_pos = LinearModel(num_joints * 2,
                                (num_joints - 1) * 3).to(device)
        model_pos.apply(init_weights)
    elif args.architecture == 'gcn':
        from models.sem_gcn import SemGCN
        from common.graph_utils import adj_mx_from_skeleton
        p_dropout = (None if args.dropout == 0.0 else args.dropout)
        adj = adj_mx_from_skeleton(dataset.skeleton())
        model_pos = SemGCN(convm,
                           adj,
                           args.hid_dim,
                           num_layers=args.num_layers,
                           p_dropout=p_dropout,
                           nodes_group=dataset.skeleton().joints_group()
                           if args.non_local else None).to(device)
    else:
        raise KeyError('Invalid model architecture')

    print("==> Total parameters: {:.2f}M".format(
        sum(p.numel() for p in model_pos.parameters()) / 1000000.0))

    # Resume from a checkpoint
    ckpt_path = args.evaluate

    if path.isfile(ckpt_path):
        print("==> Loading checkpoint '{}'".format(ckpt_path))
        ckpt = torch.load(ckpt_path)
        start_epoch = ckpt['epoch']
        error_best = ckpt['error']
        model_pos.load_state_dict(ckpt['state_dict'])
        print("==> Loaded checkpoint (Epoch: {} | Error: {})".format(
            start_epoch, error_best))
    else:
        raise RuntimeError("==> No checkpoint found at '{}'".format(ckpt_path))

    print('==> Rendering...')

    poses_2d = keypoints[args.viz_subject][args.viz_action]
    out_poses_2d = poses_2d[args.viz_camera]
    out_actions = [args.viz_camera] * out_poses_2d.shape[0]

    poses_3d = dataset[args.viz_subject][args.viz_action]['positions_3d']
    assert len(poses_3d) == len(poses_2d), 'Camera count mismatch'
    out_poses_3d = poses_3d[args.viz_camera]

    ground_truth = dataset[args.viz_subject][args.viz_action]['positions_3d'][
        args.viz_camera].copy()

    input_keypoints = out_poses_2d.copy()
    render_loader = DataLoader(PoseGenerator([out_poses_3d], [out_poses_2d],
                                             [out_actions]),
                               batch_size=args.batch_size,
                               shuffle=False,
                               num_workers=args.num_workers,
                               pin_memory=True)

    prediction = evaluate(render_loader, model_pos, device,
                          args.architecture)[0]

    # Invert camera transformation
    cam = dataset.cameras()[args.viz_subject][args.viz_camera]
    prediction = camera_to_world(prediction, R=cam['orientation'], t=0)
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])
    ground_truth = camera_to_world(ground_truth, R=cam['orientation'], t=0)
    ground_truth[:, :, 2] -= np.min(ground_truth[:, :, 2])

    anim_output = {'Regression': prediction, 'Ground truth': ground_truth}
    input_keypoints = image_coordinates(input_keypoints[..., :2],
                                        w=cam['res_w'],
                                        h=cam['res_h'])
    render_animation(input_keypoints,
                     anim_output,
                     dataset.skeleton(),
                     dataset.fps(),
                     args.viz_bitrate,
                     cam['azimuth'],
                     args.viz_output,
                     limit=args.viz_limit,
                     downsample=args.viz_downsample,
                     size=args.viz_size,
                     input_video_path=args.viz_video,
                     viewport=(cam['res_w'], cam['res_h']),
                     input_video_skip=args.viz_skip)
示例#16
0
def analyze_frame(h, frame):

    boxes, keypoints = infer.inference_on_frame(h['predictor'], frame)

    # step 4: prepare data.
    # take 2d keypoints, that's it
    # first element is empty array, second is our actual frame data, a 3d numpy array with first dimension 1, second and third being the 17 joints of 3 doubles each.
    kp = keypoints[1][0][:2, :].T  # extract (x, y) just like in prepare_data_2d_custom code

    # what to do if kp is NaN or missing data or something?
    # I guess just ignore it

    # they do this  at the end of step4. but we keep it simple, and take the data from step2 directly into a variable.
    #     output[canonical_name]['custom'] = [data[0]['keypoints'].astype('float32')]
    #output_custom_canonical_bullshit = kp.astype('float32')

    # this is what happens at  the end of step4. which is a file that is loaded in the beginning of step 5.
    #     np.savez_compressed(os.path.join(args.dataoutputdir, output_prefix_2d + args.output), positions_2d=output, metadata=metadata)

    # this is the bullshit they do in the original script.
    # confusingly, keypoints is actually just data, until it is set to keypoints[positions_2d]
    # keypoints = np.load('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz', allow_pickle=True)

    # step 5: ..... all the other shit
    # starting to copy stuff over from run.py

    # extract dataset from the init dictionary
    dataset = h['dataset']
    keypoints_metadata = h['keypoints_metadata']
    keypoints_symmetry = h['keypoints_symmetry']

    kps_left = h['kps_left']
    kps_right = h['kps_right']
    joints_left = h['joints_left']
    joints_right = h['joints_right']

    # normalize
    for i in range(len(kp)):
        koord = kp[i]
        kp[i] = normalize_screen_coordinates(koord, h['frame_metadata']['w'], h['frame_metadata']['h'])
    #for kps in enumerate(keypoints):
    #    kps[..., :2] = normalize_screen_coordinates(kps[..., :2], frame_metadata['w'], frame_metadata['h'])

    # this is taken from the args.architecture and run.py and just hardcoded, skipping a lot of nonsense
    filter_widths = [int(x) for x in "3,3,3,3,3".split(',')]
    skeleton_num_joints = dataset.skeleton().num_joints()
    #skeleton_num_joints = 17

    causal = True
    dropout = 0.25
    channels = 1024
    dense = False

    model_pos_train = TemporalModelOptimized1f(kp.shape[-2], kp.shape[-1], skeleton_num_joints,
                                               filter_widths=filter_widths, causal=causal, dropout=dropout,
                                               channels=channels)
    model_pos = TemporalModel(kp.shape[-2], kp.shape[-1], skeleton_num_joints,
                                         filter_widths=filter_widths, causal=causal, dropout=dropout,
                                         channels=channels, dense=dense)

    receptive_field = model_pos.receptive_field()
    print('INFO: Receptive field: {} frames'.format(receptive_field))
    pad = (receptive_field - 1) // 2  # Padding on each side
    #if args.causal:
    #    print('INFO: Using causal convolutions')
    #    causal_shift = pad
    #else:
    #    causal_shift = 0
    causal_shift = pad

    model_params = 0
    for parameter in model_pos.parameters():
        model_params += parameter.numel()
    print('INFO: Trainable parameter count:', model_params)

    if torch.cuda.is_available():
        model_pos = model_pos.cuda()
        model_pos_train = model_pos_train.cuda()

    #if args.resume or args.evaluate:
    if True:
        chk_filename = "checkpoint/pretrained_h36m_detectron_coco.bin"
        print('Loading checkpoint', chk_filename)
        checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage)
        print('This model was trained for {} epochs'.format(checkpoint['epoch']))
        model_pos_train.load_state_dict(checkpoint['model_pos'])
        model_pos.load_state_dict(checkpoint['model_pos'])

        # false in our particular case... we might benefit from getting rid of model_traj,
        # unless it's super fast then we should just keep it in case we ever upgrade
        if 'model_traj' in checkpoint:
            # Load trajectory model if it contained in the checkpoint (e.g. for inference in the wild)
            model_traj = TemporalModel(kp.shape[-2], kp.shape[-1], 1,
                                filter_widths=filter_widths, causal=causal, dropout=dropout, channels=channels,
                                dense=dense)
            if torch.cuda.is_available():
                model_traj = model_traj.cuda()
            model_traj.load_state_dict(checkpoint['model_traj'])
        else:
            model_traj = None

    test_generator = UnchunkedGenerator(None, None, kp,
                                        pad=pad, causal_shift=causal_shift, augment=False,
                                        kps_left=kps_left, kps_right=kps_right,
                                        joints_left=joints_left, joints_right=joints_right)
    print('INFO: Testing on {} frames'.format(test_generator.num_frames()))

    # Evaluate
    def evaluate(eval_generator, action=None, return_predictions=False, use_trajectory_model=False):
        epoch_loss_3d_pos = 0
        epoch_loss_3d_pos_procrustes = 0
        epoch_loss_3d_pos_scale = 0
        epoch_loss_3d_vel = 0
        with torch.no_grad():
            if not use_trajectory_model:
                model_pos.eval()
            else:
                model_traj.eval()
            N = 0
            for _, batch, batch_2d in eval_generator.next_epoch():
                inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
                if torch.cuda.is_available():
                    inputs_2d = inputs_2d.cuda()

                # Positional model
                if not use_trajectory_model:
                    predicted_3d_pos = model_pos(inputs_2d)
                else:
                    predicted_3d_pos = model_traj(inputs_2d)

                # Test-time augmentation (if enabled)
                if eval_generator.augment_enabled():
                    # Undo flipping and take average with non-flipped version
                    predicted_3d_pos[1, :, :, 0] *= -1
                    if not use_trajectory_model:
                        predicted_3d_pos[1, :, joints_left + joints_right] = predicted_3d_pos[1, :, joints_right + joints_left]
                    predicted_3d_pos = torch.mean(predicted_3d_pos, dim=0, keepdim=True)

                if return_predictions:
                    return predicted_3d_pos.squeeze(0).cpu().numpy()

                inputs_3d = torch.from_numpy(batch.astype('float32'))
                if torch.cuda.is_available():
                    inputs_3d = inputs_3d.cuda()
                inputs_3d[:, :, 0] = 0
                if eval_generator.augment_enabled():
                    inputs_3d = inputs_3d[:1]

                error = mpjpe(predicted_3d_pos, inputs_3d)
                epoch_loss_3d_pos_scale += inputs_3d.shape[0]*inputs_3d.shape[1] * n_mpjpe(predicted_3d_pos, inputs_3d).item()

                epoch_loss_3d_pos += inputs_3d.shape[0]*inputs_3d.shape[1] * error.item()
                N += inputs_3d.shape[0] * inputs_3d.shape[1]

                inputs = inputs_3d.cpu().numpy().reshape(-1, inputs_3d.shape[-2], inputs_3d.shape[-1])
                predicted_3d_pos = predicted_3d_pos.cpu().numpy().reshape(-1, inputs_3d.shape[-2], inputs_3d.shape[-1])

                epoch_loss_3d_pos_procrustes += inputs_3d.shape[0]*inputs_3d.shape[1] * p_mpjpe(predicted_3d_pos, inputs)

                # Compute velocity error
                epoch_loss_3d_vel += inputs_3d.shape[0]*inputs_3d.shape[1] * mean_velocity_error(predicted_3d_pos, inputs)

        if action is None:
            print('----------')
        else:
            print('----'+action+'----')
        e1 = (epoch_loss_3d_pos / N)*1000
        e2 = (epoch_loss_3d_pos_procrustes / N)*1000
        e3 = (epoch_loss_3d_pos_scale / N)*1000
        ev = (epoch_loss_3d_vel / N)*1000
        print('Test time augmentation:', eval_generator.augment_enabled())
        print('Protocol #1 Error (MPJPE):', e1, 'mm')
        print('Protocol #2 Error (P-MPJPE):', e2, 'mm')
        print('Protocol #3 Error (N-MPJPE):', e3, 'mm')
        print('Velocity Error (MPJVE):', ev, 'mm')
        print('----------')

        return e1, e2, e3, ev

    image_keypoints2d = kp
    gen = UnchunkedGenerator(None, None, [[image_keypoints2d]],
                             pad=pad, causal_shift=causal_shift, augment=False,
                             kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right)
    prediction = evaluate(gen, return_predictions=True)

    # here is the data format
    # public enum VideoPose3dJointOrder
    # {
    #     HIP = 0,
    #     R_HIP = 1,
    #     R_KNEE = 2,
    #     R_FOOT = 3,
    #     L_HIP = 4,
    #     L_KNEE = 5,
    #     L_FOOT = 6,
    #     SPINE = 7,
    #     THORAX = 8,
    #     NOSE = 9,
    #     HEAD = 10,
    #     L_SHOULDER = 11,
    #     L_ELBOW = 12,
    #     L_WRIST = 13,
    #     R_SHOULDER = 14,
    #     R_ELBOW = 15,
    #     R_WRIST = 16
    # }

    # this bugs out. dunno what the hell they were trying to do.
    # anyway we can fix it by just getting width/height some other way.

    # Invert camera transformation
    cam = dataset.cameras()

    width = cam['frame'][0]['res_w']
    height = cam['frame'][0]['res_h']

    image_keypoints2d = image_coordinates(image_keypoints2d[..., :2], w=width, h=height)

    viz_camera = 0

    # If the ground truth is not available, take the camera extrinsic params from a random subject.
    # They are almost the same, and anyway, we only need this for visualization purposes.
    for subject in dataset.cameras():
        if 'orientation' in dataset.cameras()[subject][viz_camera]:
            rot = dataset.cameras()[subject][viz_camera]['orientation']
            break
    prediction = camera_to_world(prediction, R=rot, t=0)
    # We don't have the trajectory, but at least we can rebase the height
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])

    # because algo was meant for a list of frames, we take the first frame (our only frame)
    prediction3d = prediction[0]

    return prediction3d, image_keypoints2d

    # do we want to visualize? this code used to write to json and create a video for visualization
    #if args.viz_output is not None:
    if True:

        anim_output = {'Reconstruction': prediction}

        # format the data in the same format as mediapipe, so we can load it in unity with the same script
        # we need a list (frames) of lists of 3d landmarks.
        unity_landmarks = prediction.tolist()

        # how to send data? or display it?
        # maybe draw it on the webcam feed....?!?!?!


        #with open(args.output_json, "w") as json_file:
        #    json.dump(unity_landmarks, json_file)

        #if args.rendervideo == "yes":
        #    from common.visualization import render_animation
        #    render_animation(input_keypoints, keypoints_metadata, anim_output,
        #                     dataset.skeleton(), dataset.fps(), args.viz_bitrate, cam['azimuth'], args.viz_output,
        #                     limit=args.viz_limit, downsample=args.viz_downsample, size=args.viz_size,
        #                     input_video_path=args.viz_video, viewport=(cam['res_w'], cam['res_h']),
        #                     input_video_skip=args.viz_skip)

    we_re_done_here = 1
示例#17
0
        # Predictions are in camera space
        np.save(args.viz_export, prediction)

    if args.viz_output is not None:
        # Invert camera transformation
        cam = dataset.cameras()[args.viz_subject][args.viz_camera]

        # If the ground truth is not available, take the camera extrinsic params from a random subject.
        # They are almost the same, and anyway, we only need this for visualization purposes.
        rot = None
        for subject in dataset.cameras():
            if 'orientation' in dataset.cameras()[subject][args.viz_camera]:
                rot = dataset.cameras()[subject][
                    args.viz_camera]['orientation']
                break
        prediction = camera_to_world(prediction, R=rot, t=0)
        # We don't have the trajectory, but at least we can rebase the height
        prediction[:, :, 2] -= np.min(prediction[:, :, 2])

        anim_output = {'Reconstruction': prediction}

        input_keypoints = image_coordinates(input_keypoints[..., :2],
                                            w=cam['res_w'],
                                            h=cam['res_h'])
        # print('w, h:', cam['res_w'], cam['res_h'])
        from common.visualization import render_animation

        print("rot:", rot)
        print("cam['azimuth']:", cam['azimuth'])
        render_animation(input_keypoints,
                         keypoints_metadata,
示例#18
0
def draw_3Dimg_adjust(pos, image, display=None, kpt2D=None, shapes=None):
    from mpl_toolkits.mplot3d import Axes3D  # projection 3D 必须要这个
    from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
    fig = plt.figure(figsize=(12, 6))
    canvas = FigureCanvas(fig)

    # 2D
    fig.add_subplot(131)
    if isinstance(kpt2D, np.ndarray):
        plt.imshow(draw_2Dimg(image, kpt2D))
    else:
        plt.imshow(image)

    # nrsfm 矫正
    # if shapes is not None:
    #     index_list = [0, 5, 7, 9, 6, 8, 10, 11, 13, 15, 12, 14, 16]
    #     pos[index_list] = shapes[-1]
    # 3D
    ax = fig.add_subplot(132, projection='3d')
    radius = 1.7
    ax.view_init(elev=15., azim=70.)
    ax.set_xlim3d([-radius / 2, radius / 2])
    ax.set_zlim3d([0, radius])
    ax.set_ylim3d([-radius / 2, radius / 2])
    ax.set_aspect('equal')
    # 坐标轴刻度
    # ax.set_xticklabels([])
    # ax.set_yticklabels([])
    # ax.set_zticklabels([])
    # ax.dist = 7.5
    parents = common.skeleton_parents
    joints_right = common.joints_right

    for j, j_parent in enumerate(parents):
        if j_parent == -1:
            continue

        col = 'red' if j in joints_right else 'black'
        # 画图3D
        ax.plot([pos[j, 0], pos[j_parent, 0]], [pos[j, 1], pos[j_parent, 1]],
                [pos[j, 2], pos[j_parent, 2]],
                zdir='z',
                c=col)

    # nrsfm
    bx = fig.add_subplot(133, projection='3d')
    bx.view_init(elev=15., azim=70.)
    bx.set_xlim3d([-radius / 2, radius / 2])
    bx.set_zlim3d([0, radius])
    bx.set_ylim3d([-radius / 2, radius / 2])
    bx.set_aspect('equal')
    # to_world
    from common.camera import camera_to_world
    shapes = np.array(shapes)
    shapes = shapes[:, :, [0, 2, 1]]
    shapes = camera_to_world(shapes, R=common.rot, t=0)
    shapes[:, :, 2] -= np.min(shapes[:, :, 2])
    one_shape = shapes[-1]
    # one_shape = one_shape[:, [0, 2, 1]]

    for j, j_parent in common.my_connections:
        bx.plot([one_shape[j, 0], one_shape[j_parent, 0]],
                [one_shape[j, 1], one_shape[j_parent, 1]],
                [one_shape[j, 2], one_shape[j_parent, 2]],
                zdir='z',
                c='black')

    width, height = fig.get_size_inches() * fig.get_dpi()
    canvas.draw()  # draw the canvas, cache the renderer
    image = np.fromstring(canvas.tostring_rgb(),
                          dtype='uint8').reshape(int(height), int(width), 3)
    if display:
        cv2.imshow('im', image)
        cv2.waitKey(1)

    return image