Python TemporalModel примеры использования

Язык программирования: Python

Пространство имен/Пакет: common.model

Класс/Тип: TemporalModel

Примеров на hotexamples.com: 11

Python TemporalModel - 11 примеров найдено. Это лучшие примеры Python кода для common.model.TemporalModel, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

TemporalModel(10)

receptive_field(9)

load_state_dict(8)

cuda(6)

parameters(3)

to(2)

eval(1)

state_dict(1)

Пример #1

Показать файл

 def init_model(self):
     self.model = TemporalModel(self.valid_poses[0].shape[-2],
                                self.valid_poses[0].shape[-1],
                                self.dataset.skeleton().num_joints(),
                                filter_widths=[3, 3, 3, 3, 3],
                                causal=False,
                                dropout=0.25,
                                channels=1024,
                                dense=False)
     self.model.load_state_dict(self.checkpoint['model_pos'])

Пример #2

Показать файл

Файл: utils.py Проект: serviceberry3/3d-pose-baseline_android

def videopose_model_load():
    # load trained model
    from common.model import TemporalModel

    chk_filename = main_path + '/../checkpoint/cpn-pt-243.bin'

    checkpoint = torch.load(chk_filename,
                            map_location=lambda storage, loc: storage)

    model_pos = TemporalModel(17,
                              2,
                              17,
                              filter_widths=[3, 3, 3, 3, 3],
                              causal=False,
                              dropout=False,
                              channels=1024,
                              dense=False)

    #bypass CUDA for now to run only on CPU
    #model_pos = model_pos.cuda()

    model_pos.load_state_dict(checkpoint['model_pos'])

    # Print model's state_dict
    print("Model's state_dict:")
    for param_tensor in model_pos.state_dict():
        print(param_tensor, "\t", model_pos.state_dict()[param_tensor].size())

    receptive_field = model_pos.receptive_field()

    return model_pos

Пример #3

Показать файл

def create_model():
    # 加载模型
    filter_widths = [int(x) for x in args.architecture.split(',')]
    model_eval = TemporalModel(poses_valid_2d[0].shape[-2],
                               poses_valid_2d[0].shape[-1],
                               dataset.skeleton().num_joints(),
                               filter_widths=filter_widths,
                               causal=args.causal,
                               dropout=args.dropout,
                               channels=args.channels,
                               dense=args.dense)

    receptive_field = model_eval.receptive_field()
    print('INFO: Receptive field: {} frames'.format(receptive_field))
    pad = (receptive_field - 1) // 2  # Padding on each side
    if args.causal:
        print('INFO: Using causal convolutions')
        causal_shift = pad
    else:
        causal_shift = 0

    model_params = 0
    for parameter in model_eval.parameters():
        model_params += parameter.numel()
    print('INFO: Trainable parameter count:', model_params)

    model_eval.to(device)
    return model_eval, causal_shift, pad

Пример #4

Показать файл

Файл: video_demo.py Проект: hundanLi/Real-Time-3D-Human-Pose-Estimation

def get_pose3d_predictor(ckpt_dir,
                         ckpt_name,
                         filter_widths,
                         causal=False,
                         channels=1024):
    """
    加载3d关节点坐标预测器
    Args:
        channels:
        ckpt_dir:
        ckpt_name:
        filter_widths:
        causal:

    Returns: pose3d_predictor

    """
    ckpt_path = os.path.join(ckpt_dir, ckpt_name)
    print('Loading checkpoint', ckpt_path)
    checkpoint = torch.load(ckpt_path,
                            map_location=lambda storage, loc: storage)
    print('This model was trained for {} epochs'.format(checkpoint['epoch']))

    pose3d_predictor = TemporalModel(17,
                                     2,
                                     17,
                                     filter_widths=filter_widths,
                                     causal=causal,
                                     channels=channels)
    receptive_field = pose3d_predictor.receptive_field()
    print('INFO: Receptive field: {} frames'.format(receptive_field))
    pose3d_predictor.load_state_dict(checkpoint['model_pos'])

    return pose3d_predictor.to(device).eval()

Пример #5

Показать файл

def videopose_model_load():
    # load trained model
    from common.model import TemporalModel
    chk_filename = main_path + '/checkpoint/cpn-pt-243.bin'
    checkpoint = torch.load(
        chk_filename,
        map_location=lambda storage, loc: storage)  # 把loc映射到storage
    model_pos = TemporalModel(17,
                              2,
                              17,
                              filter_widths=[3, 3, 3, 3, 3],
                              causal=False,
                              dropout=False,
                              channels=1024,
                              dense=False)
    model_pos = model_pos.cuda()
    model_pos.load_state_dict(checkpoint['model_pos'])
    receptive_field = model_pos.receptive_field()
    return model_pos

Пример #6

Показать файл

cameras_valid, poses_valid, poses_valid_2d = fetch(subjects_test, None)

filter_widths = [int(x) for x in args['architecture'].split(',')]
model_pos_train = TemporalModelOptimized1f(poses_valid_2d[0].shape[-2],
                                           poses_valid_2d[0].shape[-1],
                                           dataset.skeleton().num_joints(),
                                           filter_widths=filter_widths,
                                           causal=args['causal'],
                                           dropout=args['dropout'],
                                           channels=args['channels'])

model_pos = TemporalModel(poses_valid_2d[0].shape[-2],
                          poses_valid_2d[0].shape[-1],
                          dataset.skeleton().num_joints(),
                          filter_widths=filter_widths,
                          causal=args['causal'],
                          dropout=args['dropout'],
                          channels=args['channels'],
                          dense=args['dense'])
causal_shift = 0

if torch.cuda.is_available():
    model_pos = model_pos.cuda()
    model_pos_train = model_pos_train.cuda()

if args['resume'] or args['evaluate']:
    chk_filename = os.path.join(
        args['checkpoint'],
        args['resume'] if args['resume'] else args['evaluate'])
    print('Loading checkpoint', chk_filename)
    checkpoint = torch.load(chk_filename,

Пример #7

Показать файл

    elif stride > 1:
        # Downsample as requested
        for i in range(len(out_poses_2d)):
            out_poses_2d[i] = out_poses_2d[i][::stride]
            if out_poses_3d is not None:
                out_poses_3d[i] = out_poses_3d[i][::stride]

    return out_camera_params, out_poses_3d, out_poses_2d


cameras_valid, poses_valid, poses_valid_2d = fetch(['detectron2'], None)

model_pos = TemporalModel(poses_valid_2d[0].shape[-2],
                          poses_valid_2d[0].shape[-1],
                          dataset.skeleton().num_joints(),
                          filter_widths=[3, 3, 3, 3, 3],
                          causal=False,
                          dropout=0.25,
                          channels=1024,
                          dense=False)

receptive_field = model_pos.receptive_field()
pad = (receptive_field - 1) // 2
causal_shift = 0
if torch.cuda.is_available():
    model_pos = model_pos.cuda()

checkpoint = torch.load(chk_filename,
                        map_location=lambda storage, loc: storage)
model_pos.load_state_dict(checkpoint['model_pos'])

test_generator = UnchunkedGenerator(cameras_valid,

Пример #8

Показать файл

def analyze_frame(h, frame):

    boxes, keypoints = infer.inference_on_frame(h['predictor'], frame)

    # step 4: prepare data.
    # take 2d keypoints, that's it
    # first element is empty array, second is our actual frame data, a 3d numpy array with first dimension 1, second and third being the 17 joints of 3 doubles each.
    kp = keypoints[1][0][:2, :].T  # extract (x, y) just like in prepare_data_2d_custom code

    # what to do if kp is NaN or missing data or something?
    # I guess just ignore it

    # they do this  at the end of step4. but we keep it simple, and take the data from step2 directly into a variable.
    #     output[canonical_name]['custom'] = [data[0]['keypoints'].astype('float32')]
    #output_custom_canonical_bullshit = kp.astype('float32')

    # this is what happens at  the end of step4. which is a file that is loaded in the beginning of step 5.
    #     np.savez_compressed(os.path.join(args.dataoutputdir, output_prefix_2d + args.output), positions_2d=output, metadata=metadata)

    # this is the bullshit they do in the original script.
    # confusingly, keypoints is actually just data, until it is set to keypoints[positions_2d]
    # keypoints = np.load('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz', allow_pickle=True)

    # step 5: ..... all the other shit
    # starting to copy stuff over from run.py

    # extract dataset from the init dictionary
    dataset = h['dataset']
    keypoints_metadata = h['keypoints_metadata']
    keypoints_symmetry = h['keypoints_symmetry']

    kps_left = h['kps_left']
    kps_right = h['kps_right']
    joints_left = h['joints_left']
    joints_right = h['joints_right']

    # normalize
    for i in range(len(kp)):
        koord = kp[i]
        kp[i] = normalize_screen_coordinates(koord, h['frame_metadata']['w'], h['frame_metadata']['h'])
    #for kps in enumerate(keypoints):
    #    kps[..., :2] = normalize_screen_coordinates(kps[..., :2], frame_metadata['w'], frame_metadata['h'])

    # this is taken from the args.architecture and run.py and just hardcoded, skipping a lot of nonsense
    filter_widths = [int(x) for x in "3,3,3,3,3".split(',')]
    skeleton_num_joints = dataset.skeleton().num_joints()
    #skeleton_num_joints = 17

    causal = True
    dropout = 0.25
    channels = 1024
    dense = False

    model_pos_train = TemporalModelOptimized1f(kp.shape[-2], kp.shape[-1], skeleton_num_joints,
                                               filter_widths=filter_widths, causal=causal, dropout=dropout,
                                               channels=channels)
    model_pos = TemporalModel(kp.shape[-2], kp.shape[-1], skeleton_num_joints,
                                         filter_widths=filter_widths, causal=causal, dropout=dropout,
                                         channels=channels, dense=dense)

    receptive_field = model_pos.receptive_field()
    print('INFO: Receptive field: {} frames'.format(receptive_field))
    pad = (receptive_field - 1) // 2  # Padding on each side
    #if args.causal:
    #    print('INFO: Using causal convolutions')
    #    causal_shift = pad
    #else:
    #    causal_shift = 0
    causal_shift = pad

    model_params = 0
    for parameter in model_pos.parameters():
        model_params += parameter.numel()
    print('INFO: Trainable parameter count:', model_params)

    if torch.cuda.is_available():
        model_pos = model_pos.cuda()
        model_pos_train = model_pos_train.cuda()

    #if args.resume or args.evaluate:
    if True:
        chk_filename = "checkpoint/pretrained_h36m_detectron_coco.bin"
        print('Loading checkpoint', chk_filename)
        checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage)
        print('This model was trained for {} epochs'.format(checkpoint['epoch']))
        model_pos_train.load_state_dict(checkpoint['model_pos'])
        model_pos.load_state_dict(checkpoint['model_pos'])

        # false in our particular case... we might benefit from getting rid of model_traj,
        # unless it's super fast then we should just keep it in case we ever upgrade
        if 'model_traj' in checkpoint:
            # Load trajectory model if it contained in the checkpoint (e.g. for inference in the wild)
            model_traj = TemporalModel(kp.shape[-2], kp.shape[-1], 1,
                                filter_widths=filter_widths, causal=causal, dropout=dropout, channels=channels,
                                dense=dense)
            if torch.cuda.is_available():
                model_traj = model_traj.cuda()
            model_traj.load_state_dict(checkpoint['model_traj'])
        else:
            model_traj = None

    test_generator = UnchunkedGenerator(None, None, kp,
                                        pad=pad, causal_shift=causal_shift, augment=False,
                                        kps_left=kps_left, kps_right=kps_right,
                                        joints_left=joints_left, joints_right=joints_right)
    print('INFO: Testing on {} frames'.format(test_generator.num_frames()))

    # Evaluate
    def evaluate(eval_generator, action=None, return_predictions=False, use_trajectory_model=False):
        epoch_loss_3d_pos = 0
        epoch_loss_3d_pos_procrustes = 0
        epoch_loss_3d_pos_scale = 0
        epoch_loss_3d_vel = 0
        with torch.no_grad():
            if not use_trajectory_model:
                model_pos.eval()
            else:
                model_traj.eval()
            N = 0
            for _, batch, batch_2d in eval_generator.next_epoch():
                inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
                if torch.cuda.is_available():
                    inputs_2d = inputs_2d.cuda()

                # Positional model
                if not use_trajectory_model:
                    predicted_3d_pos = model_pos(inputs_2d)
                else:
                    predicted_3d_pos = model_traj(inputs_2d)

                # Test-time augmentation (if enabled)
                if eval_generator.augment_enabled():
                    # Undo flipping and take average with non-flipped version
                    predicted_3d_pos[1, :, :, 0] *= -1
                    if not use_trajectory_model:
                        predicted_3d_pos[1, :, joints_left + joints_right] = predicted_3d_pos[1, :, joints_right + joints_left]
                    predicted_3d_pos = torch.mean(predicted_3d_pos, dim=0, keepdim=True)

                if return_predictions:
                    return predicted_3d_pos.squeeze(0).cpu().numpy()

                inputs_3d = torch.from_numpy(batch.astype('float32'))
                if torch.cuda.is_available():
                    inputs_3d = inputs_3d.cuda()
                inputs_3d[:, :, 0] = 0
                if eval_generator.augment_enabled():
                    inputs_3d = inputs_3d[:1]

                error = mpjpe(predicted_3d_pos, inputs_3d)
                epoch_loss_3d_pos_scale += inputs_3d.shape[0]*inputs_3d.shape[1] * n_mpjpe(predicted_3d_pos, inputs_3d).item()

                epoch_loss_3d_pos += inputs_3d.shape[0]*inputs_3d.shape[1] * error.item()
                N += inputs_3d.shape[0] * inputs_3d.shape[1]

                inputs = inputs_3d.cpu().numpy().reshape(-1, inputs_3d.shape[-2], inputs_3d.shape[-1])
                predicted_3d_pos = predicted_3d_pos.cpu().numpy().reshape(-1, inputs_3d.shape[-2], inputs_3d.shape[-1])

                epoch_loss_3d_pos_procrustes += inputs_3d.shape[0]*inputs_3d.shape[1] * p_mpjpe(predicted_3d_pos, inputs)

                # Compute velocity error
                epoch_loss_3d_vel += inputs_3d.shape[0]*inputs_3d.shape[1] * mean_velocity_error(predicted_3d_pos, inputs)

        if action is None:
            print('----------')
        else:
            print('----'+action+'----')
        e1 = (epoch_loss_3d_pos / N)*1000
        e2 = (epoch_loss_3d_pos_procrustes / N)*1000
        e3 = (epoch_loss_3d_pos_scale / N)*1000
        ev = (epoch_loss_3d_vel / N)*1000
        print('Test time augmentation:', eval_generator.augment_enabled())
        print('Protocol #1 Error (MPJPE):', e1, 'mm')
        print('Protocol #2 Error (P-MPJPE):', e2, 'mm')
        print('Protocol #3 Error (N-MPJPE):', e3, 'mm')
        print('Velocity Error (MPJVE):', ev, 'mm')
        print('----------')

        return e1, e2, e3, ev

    image_keypoints2d = kp
    gen = UnchunkedGenerator(None, None, [[image_keypoints2d]],
                             pad=pad, causal_shift=causal_shift, augment=False,
                             kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right)
    prediction = evaluate(gen, return_predictions=True)

    # here is the data format
    # public enum VideoPose3dJointOrder
    # {
    #     HIP = 0,
    #     R_HIP = 1,
    #     R_KNEE = 2,
    #     R_FOOT = 3,
    #     L_HIP = 4,
    #     L_KNEE = 5,
    #     L_FOOT = 6,
    #     SPINE = 7,
    #     THORAX = 8,
    #     NOSE = 9,
    #     HEAD = 10,
    #     L_SHOULDER = 11,
    #     L_ELBOW = 12,
    #     L_WRIST = 13,
    #     R_SHOULDER = 14,
    #     R_ELBOW = 15,
    #     R_WRIST = 16
    # }

    # this bugs out. dunno what the hell they were trying to do.
    # anyway we can fix it by just getting width/height some other way.

    # Invert camera transformation
    cam = dataset.cameras()

    width = cam['frame'][0]['res_w']
    height = cam['frame'][0]['res_h']

    image_keypoints2d = image_coordinates(image_keypoints2d[..., :2], w=width, h=height)

    viz_camera = 0

    # If the ground truth is not available, take the camera extrinsic params from a random subject.
    # They are almost the same, and anyway, we only need this for visualization purposes.
    for subject in dataset.cameras():
        if 'orientation' in dataset.cameras()[subject][viz_camera]:
            rot = dataset.cameras()[subject][viz_camera]['orientation']
            break
    prediction = camera_to_world(prediction, R=rot, t=0)
    # We don't have the trajectory, but at least we can rebase the height
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])

    # because algo was meant for a list of frames, we take the first frame (our only frame)
    prediction3d = prediction[0]

    return prediction3d, image_keypoints2d

    # do we want to visualize? this code used to write to json and create a video for visualization
    #if args.viz_output is not None:
    if True:

        anim_output = {'Reconstruction': prediction}

        # format the data in the same format as mediapipe, so we can load it in unity with the same script
        # we need a list (frames) of lists of 3d landmarks.
        unity_landmarks = prediction.tolist()

        # how to send data? or display it?
        # maybe draw it on the webcam feed....?!?!?!


        #with open(args.output_json, "w") as json_file:
        #    json.dump(unity_landmarks, json_file)

        #if args.rendervideo == "yes":
        #    from common.visualization import render_animation
        #    render_animation(input_keypoints, keypoints_metadata, anim_output,
        #                     dataset.skeleton(), dataset.fps(), args.viz_bitrate, cam['azimuth'], args.viz_output,
        #                     limit=args.viz_limit, downsample=args.viz_downsample, size=args.viz_size,
        #                     input_video_path=args.viz_video, viewport=(cam['res_w'], cam['res_h']),
        #                     input_video_skip=args.viz_skip)

    we_re_done_here = 1

Пример #9

Показать файл

class Predictor:
    def __init__(self,
                 dataset_path,
                 checkpoint_path,
                 input_video_path=None,
                 export_path=None,
                 output_path=None,
                 with_cude=False):
        self.with_cuda = with_cude
        self.dataset_path = dataset_path
        self.export_path = export_path
        self.output_path = output_path
        self.input_video_path = input_video_path
        self.dataset = CustomDataset(self.dataset_path)
        self.keypoints = None
        self.keypoints_left = None
        self.keypoints_right = None
        self.joints_left = None
        self.joints_right = None
        self.checkpoint = torch.load(checkpoint_path,
                                     map_location=lambda storage, loc: storage)
        self.model = None
        self.init_keypoints()
        self.valid_poses = self.keypoints["detectron2"]["custom"]
        self.init_model()
        self.test_generator = None
        self.init_generator()
        self.prediction = None
        self.make_prediction()

    def export_prediction(self):
        if self.export_path is not None:
            np.save(self.export_path, self.prediction)

    def init_model(self):
        self.model = TemporalModel(self.valid_poses[0].shape[-2],
                                   self.valid_poses[0].shape[-1],
                                   self.dataset.skeleton().num_joints(),
                                   filter_widths=[3, 3, 3, 3, 3],
                                   causal=False,
                                   dropout=0.25,
                                   channels=1024,
                                   dense=False)
        self.model.load_state_dict(self.checkpoint['model_pos'])

    def init_keypoints(self):
        self.keypoints = np.load(self.dataset_path, allow_pickle=True)
        keypoints_metadata = self.keypoints['metadata'].item()
        keypoints_symmetry = keypoints_metadata['keypoints_symmetry']
        self.keypoints_left, self.keypoints_right = list(
            keypoints_symmetry[0]), list(keypoints_symmetry[1])
        self.joints_left, self.joints_right = list(
            self.dataset.skeleton().joints_left()), list(
                self.dataset.skeleton().joints_right())
        self.keypoints = self.keypoints['positions_2d'].item()

        for subject in self.keypoints.keys():
            for action in self.keypoints[subject]:
                for cam_idx, kps in enumerate(self.keypoints[subject][action]):
                    # Normalize camera frame
                    cam = self.dataset.cameras()[subject][cam_idx]
                    kps[..., :2] = normalize_screen_coordinates(kps[..., :2],
                                                                w=cam['res_w'],
                                                                h=cam['res_h'])
                    self.keypoints[subject][action][cam_idx] = kps

    def init_generator(self):
        receptive_field = self.model.receptive_field()
        pad = (receptive_field - 1) // 2
        causal_shift = 0
        self.test_generator = UnchunkedGenerator(
            None,
            None,
            self.valid_poses,
            pad=pad,
            causal_shift=causal_shift,
            augment=False,
            kps_left=self.keypoints_left,
            kps_right=self.keypoints_right,
            joints_left=self.joints_left,
            joints_right=self.joints_right)

    def make_prediction(self):
        if self.with_cuda:
            self.model = self.model.cuda()
        with torch.no_grad():
            self.model.eval()
            for _, batch, batch_2d in self.test_generator.next_epoch():
                inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
                if self.with_cuda:
                    inputs_2d = inputs_2d.cuda()

            predicted_3d_pos = self.model(inputs_2d)

            if self.test_generator.augment_enabled():
                predicted_3d_pos[1, :, :, 0] *= -1
                predicted_3d_pos[1, :, self.joints_left +
                                 self.joints_right] = predicted_3d_pos[
                                     1, :,
                                     self.joints_right + self.joints_left]
                predicted_3d_pos = torch.mean(predicted_3d_pos,
                                              dim=0,
                                              keepdim=True)

            predicted_3d_pos = predicted_3d_pos.squeeze(0).cpu().numpy()
            rot = self.dataset.cameras()['detectron2'][0]['orientation']
            predicted_3d_pos = camera_to_world(predicted_3d_pos, R=rot, t=0)
            predicted_3d_pos[:, :, 2] -= np.min(predicted_3d_pos[:, :, 2])
            self.prediction = predicted_3d_pos

    def plot_pose(self, pose_index=0):
        pose = make_pose(self.prediction.tolist()[pose_index])
        pose.prepare_plot()
        pose.plot()

Пример #10

Показать файл

Файл: aimocap.py Проект: fsImageries/mayapyUtils

def videpose_infer(args):
    from common.camera import normalize_screen_coordinates, camera_to_world, image_coordinates
    from common.generators import UnchunkedGenerator
    from common.model import TemporalModel
    from common.utils import Timer, evaluate, add_path
    from videopose import get_detector_2d, ckpt_time, metadata, time0

    import gene_npz

    gene_npz.args.outputpath = str(args.viz_output / "alpha_pose_kunkun_cut")
    print(gene_npz.args)
    # detector_2d = get_detector_2d(args.detector_2d)
    detector_2d = gene_npz.generate_kpts(args.detector_2d)

    assert detector_2d, 'detector_2d should be in ({alpha, hr, open}_pose)'

    # 2D kpts loads or generate
    if not args.input_npz:
        video_name = args.viz_video
        keypoints = detector_2d(video_name)
    else:
        npz = np.load(args.input_npz)
        keypoints = npz['kpts']  # (N, 17, 2)

    keypoints_symmetry = metadata['keypoints_symmetry']
    kps_left, kps_right = list(
        keypoints_symmetry[0]), list(keypoints_symmetry[1])
    joints_left, joints_right = list(
        [4, 5, 6, 11, 12, 13]), list([1, 2, 3, 14, 15, 16])

    # normlization keypoints  Suppose using the camera parameter
    keypoints = normalize_screen_coordinates(
        keypoints[..., :2], w=1000, h=1002)

    model_pos = TemporalModel(17, 2, 17, filter_widths=[3, 3, 3, 3, 3], causal=args.causal, dropout=args.dropout, channels=args.channels,
                              dense=args.dense)

    if torch.cuda.is_available():
        model_pos = model_pos.cuda()

    ckpt, time1 = ckpt_time(time0)
    print('-------------- load data spends {:.2f} seconds'.format(ckpt))

    # load trained model
    chk_filename = os.path.join(
        args.checkpoint, args.resume if args.resume else args.evaluate)
    print('Loading checkpoint', chk_filename)
    checkpoint = torch.load(
        chk_filename, map_location=lambda storage, loc: storage)  # 把loc映射到storage
    model_pos.load_state_dict(checkpoint['model_pos'])

    ckpt, time2 = ckpt_time(time1)
    print('-------------- load 3D model spends {:.2f} seconds'.format(ckpt))

    #  Receptive field: 243 frames for args.arc [3, 3, 3, 3, 3]
    receptive_field = model_pos.receptive_field()
    pad = (receptive_field - 1) // 2  # Padding on each side
    causal_shift = 0

    print('Rendering...')
    input_keypoints = keypoints.copy()
    gen = UnchunkedGenerator(None, None, [input_keypoints],
                             pad=pad, causal_shift=causal_shift, augment=args.test_time_augmentation,
                             kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right)
    prediction = evaluate(gen, model_pos, return_predictions=True)

    # save 3D joint points
    np.save(args.viz_output / "test_3d_output.npy",
            prediction, allow_pickle=True)

    rot = np.array([0.14070565, -0.15007018, -0.7552408,
                   0.62232804], dtype=np.float32)
    prediction = camera_to_world(prediction, R=rot, t=0)

    # We don't have the trajectory, but at least we can rebase the height
    prediction[:, :, 2] -= np.min(prediction[:, :, 2])
    anim_output = {'Reconstruction': prediction}
    input_keypoints = image_coordinates(
        input_keypoints[..., :2], w=1000, h=1002)

    ckpt, time3 = ckpt_time(time2)
    print(
        '-------------- generate reconstruction 3D data spends {:.2f} seconds'.format(ckpt))

    ckpt, time4 = ckpt_time(time3)
    print('total spend {:2f} second'.format(ckpt))

Пример #11

Показать файл

def main(input_args):
    vp3d_dir = input_args.vp3d_dir
    sys.path.append(vp3d_dir)

    from common.camera import normalize_screen_coordinates
    from common.model import TemporalModel
    from common.generators import UnchunkedGenerator
    from common.arguments import parse_args

    args = parse_args()
    print(args)

    kps_left = [4, 5, 6, 11, 12, 13]
    kps_right = [1, 2, 3, 14, 15, 16]
    joints_left = [4, 5, 6, 11, 12, 13]
    joints_right = [1, 2, 3, 14, 15, 16]

    filter_widths = [int(x) for x in args.architecture.split(',')]

    num_joints_in = 17
    in_features = 2
    num_joints_out = 17
        
    model_pos = TemporalModel(num_joints_in, in_features, num_joints_out,
                                filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels,
                                dense=args.dense)

    receptive_field = model_pos.receptive_field()
    print('INFO: Receptive field: {} frames'.format(receptive_field))
    pad = (receptive_field - 1) // 2 # Padding on each side
    if args.causal:
        print('INFO: Using causal convolutions')
        causal_shift = pad
    else:
        causal_shift = 0

    model_params = 0
    for parameter in model_pos.parameters():
        model_params += parameter.numel()
    print('INFO: Trainable parameter count:', model_params)

    if torch.cuda.is_available():
        model_pos = model_pos.cuda()
        
    if args.resume or args.evaluate:
        chk_filename = os.path.join(vp3d_dir, args.checkpoint, args.resume if args.resume else args.evaluate)
        print('Loading checkpoint', chk_filename)
        checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage)
        print('This model was trained for {} epochs'.format(checkpoint['epoch']))
        model_pos.load_state_dict(checkpoint['model_pos'])

    # Evaluate
    def evaluate(test_generator, action=None, return_predictions=False):
        epoch_loss_3d_pos = 0
        epoch_loss_3d_pos_procrustes = 0
        epoch_loss_3d_pos_scale = 0
        epoch_loss_3d_vel = 0
        with torch.no_grad():
            model_pos.eval()
            N = 0
            for _, batch, batch_2d in test_generator.next_epoch():
                inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
                if torch.cuda.is_available():
                    inputs_2d = inputs_2d.cuda()
                # Positional model
                predicted_3d_pos = model_pos(inputs_2d)

                # Test-time augmentation (if enabled)
                if test_generator.augment_enabled():
                    # Undo flipping and take average with non-flipped version
                    predicted_3d_pos[1, :, :, 0] *= -1
                    predicted_3d_pos[1, :, joints_left + joints_right] = predicted_3d_pos[1, :, joints_right + joints_left]
                    predicted_3d_pos = torch.mean(predicted_3d_pos, dim=0, keepdim=True)
                    
                if return_predictions:
                    return predicted_3d_pos.squeeze(0).cpu().numpy()
                    
                inputs_3d = torch.from_numpy(batch.astype('float32'))
                if torch.cuda.is_available():
                    inputs_3d = inputs_3d.cuda()
                inputs_3d[:, :, 0] = 0    
                if test_generator.augment_enabled():
                    inputs_3d = inputs_3d[:1]

                error = mpjpe(predicted_3d_pos, inputs_3d)
                epoch_loss_3d_pos_scale += inputs_3d.shape[0]*inputs_3d.shape[1] * n_mpjpe(predicted_3d_pos, inputs_3d).item()

                epoch_loss_3d_pos += inputs_3d.shape[0]*inputs_3d.shape[1] * error.item()
                N += inputs_3d.shape[0] * inputs_3d.shape[1]
                
                inputs = inputs_3d.cpu().numpy().reshape(-1, inputs_3d.shape[-2], inputs_3d.shape[-1])
                predicted_3d_pos = predicted_3d_pos.cpu().numpy().reshape(-1, inputs_3d.shape[-2], inputs_3d.shape[-1])

                epoch_loss_3d_pos_procrustes += inputs_3d.shape[0]*inputs_3d.shape[1] * p_mpjpe(predicted_3d_pos, inputs)

                # Compute velocity error
                epoch_loss_3d_vel += inputs_3d.shape[0]*inputs_3d.shape[1] * mean_velocity_error(predicted_3d_pos, inputs)
                
        if action is None:
            print('----------')
        else:
            print('----'+action+'----')
        e1 = (epoch_loss_3d_pos / N)*1000
        e2 = (epoch_loss_3d_pos_procrustes / N)*1000
        e3 = (epoch_loss_3d_pos_scale / N)*1000
        ev = (epoch_loss_3d_vel / N)*1000
        print('Test time augmentation:', test_generator.augment_enabled())
        print('Protocol #1 Error (MPJPE):', e1, 'mm')
        print('Protocol #2 Error (P-MPJPE):', e2, 'mm')
        print('Protocol #3 Error (N-MPJPE):', e3, 'mm')
        print('Velocity Error (MPJVE):', ev, 'mm')
        print('----------')

        return e1, e2, e3, ev

    def get_gt_dirs(input_path, camera_id='dev3'):
        """Get all directories with ground-truth 2D human pose annotations
        """
        gt_path_list = []
        category_path_list = get_subdirs(input_path)
        for category in category_path_list:
            if os.path.basename(category) != 'Calibration':
                category_scans = get_subdirs(category)
                for category_scan in category_scans:
                    device_list = get_subdirs(category_scan)
                    for device_path in device_list:
                        if camera_id in device_path:
                            if os.path.exists(os.path.join(device_path, 'pose2d')): # 2D annotations exist
                                gt_path_list.append(device_path) # eg <root>/Lack_TV_Bench/0007_white_floor_08_04_2019_08_28_10_47/dev3
        return gt_path_list

    def get_subdirs(input_path):
        '''
        get a list of subdirectories in input_path directory
        :param input_path: parent directory (in which to get the subdirectories)
        :return:
        subdirs: list of subdirectories in input_path
        '''
        subdirs = [os.path.join(input_path, dir_i) for dir_i in os.listdir(input_path)
                   if os.path.isdir(os.path.join(input_path, dir_i))]
        subdirs.sort()
        return subdirs

    fps = 30
    frame_width = 1920.0
    frame_height = 1080.0

    h36m_joint_names = get_h36m_joint_names()
    h36m_joint_names_dict = {name: i for i, name in enumerate(h36m_joint_names)}
    joint_names = get_body25_joint_names()
    joint_names_dict = {name: i for i, name in enumerate(joint_names)}

    dataset_dir = input_args.dataset_dir
    camera_id = input_args.camera_id

    gt_dirs = get_gt_dirs(dataset_dir, camera_id)
    for i, gt_dir in enumerate(gt_dirs):
        print(f"\nProcessing {i} of {len(gt_dirs)}: {' '.join(gt_dir.split('/')[-3:-1])}")
        
        input_dir = os.path.join(gt_dir, 'predictions', 'pose2d', 'openpose')
        output_dir = os.path.join(gt_dir, 'predictions', 'pose3d', 'vp3d')
        os.makedirs(output_dir, exist_ok=True)

        json_mask = os.path.join(input_dir, 'scan_video_00000000????_keypoints.json')
        json_files = sorted(glob(json_mask))
        input_keypoints = []
        for json_file in json_files:
            with open(json_file, 'r') as f:
                pose2d = json.load(f)
            if len(pose2d["people"]) == 0:
                keypoints_op = np.zeros((19, 3))
            else:
                keypoints_op = np.array(pose2d["people"][0]["pose_keypoints_2d"]).reshape(-1, 3) # Takes first detected person every time...
            keypoints = np.zeros((17, 3))
            for i, joint_name in enumerate(h36m_joint_names):
                if joint_name == 'spine' or joint_name == 'head':
                    continue
                joint_id = joint_names_dict[joint_name]
                keypoints[i, :] = keypoints_op[joint_id, :]
            keypoints[h36m_joint_names_dict['mid hip'], :] = np.mean((keypoints[h36m_joint_names_dict['left hip'], :], keypoints[h36m_joint_names_dict['right hip'], :]), axis=0) # mid hip = mean(left hip, right hip)
            keypoints[h36m_joint_names_dict['spine'], :] = np.mean((keypoints[h36m_joint_names_dict['neck'], :], keypoints[h36m_joint_names_dict['mid hip'], :]), axis=0) # spine = mean(neck, mid hip)
            keypoints[h36m_joint_names_dict['head'], :] = np.mean((keypoints_op[joint_names_dict['left ear'], :], keypoints_op[joint_names_dict['right ear'], :]), axis=0) # head = mean(left ear, right ear)
            input_keypoints.append(keypoints)
        input_keypoints = np.array(input_keypoints)

        input_keypoints = input_keypoints[:, :, :2] # For pretrained_h36m_cpn.bin and cpn_ft_h36m_dbb

        input_keypoints[..., :2] = normalize_screen_coordinates(input_keypoints[..., :2], w=frame_width, h=frame_height)

        args.test_time_augmentation=True
        gen = UnchunkedGenerator(None, None, [input_keypoints],
                                 pad=pad, causal_shift=causal_shift, augment=args.test_time_augmentation,
                                 kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right)
        prediction = evaluate(gen, return_predictions=True) # Nx17x3

        pickle.dump(prediction, open(os.path.join(output_dir, 'vp3d_output.pkl'), "wb"))