示例#1
0
class VIBE_Demo(nn.Module):
    def __init__(
            self,
            seqlen,
            batch_size=64,
            n_layers=1,
            hidden_size=2048,
            add_linear=False,
            bidirectional=False,
            use_residual=True,
            pretrained=osp.join(VIBE_DATA_DIR,
                                'spin_model_checkpoint.pth.tar'),
    ):

        super(VIBE_Demo, self).__init__()

        self.seqlen = seqlen
        self.batch_size = batch_size

        self.encoder = TemporalEncoder(
            n_layers=n_layers,
            hidden_size=hidden_size,
            bidirectional=bidirectional,
            add_linear=add_linear,
            use_residual=use_residual,
        )

        self.hmr = hmr()
        checkpoint = torch.load(pretrained)
        self.hmr.load_state_dict(checkpoint['model'], strict=False)

        # regressor can predict cam, pose and shape params in an iterative way
        self.regressor = Regressor()

        if pretrained and os.path.isfile(pretrained):
            pretrained_dict = torch.load(pretrained)['model']

            self.regressor.load_state_dict(pretrained_dict, strict=False)
            print(f'=> loaded pretrained model from \'{pretrained}\'')

    def forward(self, input, J_regressor=None):
        # input size NTF
        batch_size, seqlen, nc, h, w = input.shape

        feature = self.hmr.feature_extractor(input.reshape(-1, nc, h, w))

        feature = feature.reshape(batch_size, seqlen, -1)
        feature = self.encoder(feature)
        feature = feature.reshape(-1, feature.size(-1))

        smpl_output = self.regressor(feature, J_regressor=J_regressor)

        for s in smpl_output:
            s['theta'] = s['theta'].reshape(batch_size, seqlen, -1)
            s['verts'] = s['verts'].reshape(batch_size, seqlen, -1, 3)
            s['kp_2d'] = s['kp_2d'].reshape(batch_size, seqlen, -1, 2)
            s['kp_3d'] = s['kp_3d'].reshape(batch_size, seqlen, -1, 3)
            s['rotmat'] = s['rotmat'].reshape(batch_size, seqlen, -1, 3, 3)

        return smpl_output
示例#2
0
def get_regressor_output(features):
    from lib.models.spin import Regressor

    batch_size, seqlen = features.shape[:2]

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    model = Regressor().to(device)

    smpl = SMPL(SMPL_MODEL_DIR).to(device)
    pretrained = torch.load('models/model_best.pth.tar',
                            map_location=torch.device('cpu'))['gen_state_dict']

    new_pretrained_dict = {}
    for k, v in pretrained.items():
        if 'regressor' in k:
            new_pretrained_dict[k[10:]] = v
            # adapt mean theta to new batch size
            if 'mean_theta' in k:
                del new_pretrained_dict[k[10:]]

    model.load_state_dict(new_pretrained_dict, strict=False)
    features = features.reshape(batch_size * seqlen, -1)
    features = features.to(device)
    theta = model(features)[-1]

    cam = theta[:, 0:3].contiguous()
    pose = theta[:, 3:75].contiguous()
    shape = theta[:, 75:].contiguous()

    pred_output = smpl(betas=shape,
                       body_pose=pose[:, 3:],
                       global_orient=pose[:, :3],
                       pose2rot=True)
    verts = pred_output.vertices  # , _, _ = smpl(pose, shape)

    verts = verts.reshape(batch_size, seqlen, -1, 3)
    cam = cam.reshape(batch_size, seqlen, -1)

    return verts, cam
示例#3
0
class VIBE_Demo(nn.Module):
    def __init__(self,
                 seqlen,
                 batch_size=64,
                 n_layers=1,
                 hidden_size=2048,
                 pretrained='data/vibe_data/spin_model_checkpoint.pth.tar',
                 add_linear=False,
                 bidirectional=False,
                 attention=False,
                 attention_cfg=None,
                 use_residual=True,
                 disable_temporal=False):

        super(VIBE_Demo, self).__init__()

        self.seqlen = seqlen
        self.batch_size = batch_size
        self.disable_temporal = disable_temporal

        if attention:
            cfg = attention_cfg
            self.encoder = TemporalEncoderWAttention(
                hidden_size=hidden_size,
                bidirectional=bidirectional,
                add_linear=add_linear,
                attention_size=cfg.SIZE,
                attention_layers=cfg.LAYERS,
                attention_dropout=cfg.DROPOUT,
                use_residual=use_residual,
            )
        else:
            self.encoder = TemporalEncoder(
                n_layers=n_layers,
                hidden_size=hidden_size,
                bidirectional=bidirectional,
                add_linear=add_linear,
                use_residual=use_residual,
            )

        self.hmr = hmr()
        if torch.cuda.is_available():
            checkpoint = torch.load(pretrained)
        else:
            checkpoint = torch.load(pretrained,
                                    map_location=torch.device('cpu'))

        self.hmr.load_state_dict(checkpoint['model'], strict=False)

        # regressor can predict cam, pose and shape params in an iterative way
        self.regressor = Regressor()

        if pretrained and os.path.isfile(pretrained):
            if torch.cuda.is_available():
                pretrained_dict = torch.load(pretrained)['model']
            else:
                pretrained_dict = torch.load(
                    pretrained, map_location=torch.device('cpu'))['model']

            self.regressor.load_state_dict(pretrained_dict, strict=False)
            print(f'=> loaded pretrained model from \'{pretrained}\'')

    def forward(self, input, J_regressor=None):
        # input size NTF
        batch_size, seqlen, nc, h, w = input.shape

        feature = self.hmr.feature_extractor(input.reshape(-1, nc, h, w))

        if not self.disable_temporal:
            feature = feature.reshape(batch_size, seqlen, -1)
            feature = self.encoder(feature)
            feature = feature.reshape(-1, feature.size(-1))

        smpl_output = self.regressor(feature, J_regressor=J_regressor)

        for s in smpl_output:
            s['theta'] = s['theta'].reshape(batch_size, seqlen, -1)
            s['verts'] = s['verts'].reshape(batch_size, seqlen, -1, 3)
            s['kp_2d'] = s['kp_2d'].reshape(batch_size, seqlen, -1, 2)
            s['kp_3d'] = s['kp_3d'].reshape(batch_size, seqlen, -1, 3)
            s['rotmat'] = s['rotmat'].reshape(batch_size, seqlen, -1, 3, 3)

        return smpl_output
示例#4
0
class VIBE(nn.Module):
    def __init__(self,
                 seqlen,
                 batch_size=64,
                 n_layers=1,
                 hidden_size=2048,
                 pretrained='data/vibe_data/spin_model_checkpoint.pth.tar',
                 add_linear=False,
                 bidirectional=False,
                 attention=False,
                 attention_cfg=None,
                 use_residual=True,
                 use_6d=True,
                 disable_temporal=False):

        super(VIBE, self).__init__()

        self.seqlen = seqlen
        self.batch_size = batch_size
        self.disable_temporal = disable_temporal

        if attention:
            cfg = attention_cfg
            self.encoder = TemporalEncoderWAttention(
                hidden_size=hidden_size,
                bidirectional=bidirectional,
                add_linear=add_linear,
                attention_size=cfg.SIZE,
                attention_layers=cfg.LAYERS,
                attention_dropout=cfg.DROPOUT,
                use_residual=use_residual,
            )
        else:
            self.encoder = TemporalEncoder(
                n_layers=n_layers,
                hidden_size=hidden_size,
                bidirectional=bidirectional,
                add_linear=add_linear,
                use_residual=use_residual,
            )

        # regressor can predict cam, pose and shape params in an iterative way
        self.regressor = Regressor(use_6d=use_6d)

        if pretrained and os.path.isfile(pretrained):
            pretrained_dict = torch.load(pretrained)['model']

            if not use_6d:
                del pretrained_dict['decpose.weight']
                del pretrained_dict['decpose.bias']
                del pretrained_dict['fc1.weight']
                del pretrained_dict['fc1.bias']

            self.regressor.load_state_dict(pretrained_dict, strict=False)
            print(f'=> loaded pretrained model from \'{pretrained}\'')

    def forward(self, input, J_regressor=None):
        # input size NTF
        batch_size, seqlen = input.shape[:2]

        if self.disable_temporal:
            feature = input.reshape(-1, input.size(-1))
        else:
            feature = self.encoder(input)
            feature = feature.reshape(-1, feature.size(-1))

        smpl_output = self.regressor(feature, J_regressor=J_regressor)
        for s in smpl_output:
            s['theta'] = s['theta'].reshape(batch_size, seqlen, -1)
            s['verts'] = s['verts'].reshape(batch_size, seqlen, -1, 3)
            s['kp_2d'] = s['kp_2d'].reshape(batch_size, seqlen, -1, 2)
            s['kp_3d'] = s['kp_3d'].reshape(batch_size, seqlen, -1, 3)
            s['rotmat'] = s['rotmat'].reshape(batch_size, seqlen, -1, 3, 3)

        return smpl_output
class TCMR(nn.Module):
    def __init__(
            self,
            seqlen,
            batch_size=64,
            n_layers=1,
            hidden_size=2048,
            pretrained=osp.join(BASE_DATA_DIR,
                                'spin_model_checkpoint.pth.tar'),
    ):

        super(TCMR, self).__init__()

        self.seqlen = seqlen
        self.batch_size = batch_size

        self.encoder = \
            TemporalEncoder(
                seq_len=seqlen,
                n_layers=n_layers,
                hidden_size=hidden_size
            )

        # regressor can predict cam, pose and shape params in an iterative way
        self.regressor = Regressor()

        if pretrained and os.path.isfile(pretrained):
            pretrained_dict = torch.load(pretrained)['model']

            self.regressor.load_state_dict(pretrained_dict, strict=False)
            print(f'=> loaded pretrained model from \'{pretrained}\'')

    def forward(self, input, is_train=False, J_regressor=None):
        # input size NTF
        batch_size, seqlen = input.shape[:2]

        feature, scores = self.encoder(input, is_train=is_train)
        feature = feature.reshape(-1, feature.size(-1))

        smpl_output = self.regressor(feature,
                                     is_train=is_train,
                                     J_regressor=J_regressor)

        if not is_train:
            for s in smpl_output:
                s['theta'] = s['theta'].reshape(batch_size, -1)
                s['verts'] = s['verts'].reshape(batch_size, -1, 3)
                s['kp_2d'] = s['kp_2d'].reshape(batch_size, -1, 2)
                s['kp_3d'] = s['kp_3d'].reshape(batch_size, -1, 3)
                s['rotmat'] = s['rotmat'].reshape(batch_size, -1, 3, 3)
                s['scores'] = scores

        else:
            repeat_num = 3
            for s in smpl_output:
                s['theta'] = s['theta'].reshape(batch_size, repeat_num, -1)
                s['verts'] = s['verts'].reshape(batch_size, repeat_num, -1, 3)
                s['kp_2d'] = s['kp_2d'].reshape(batch_size, repeat_num, -1, 2)
                s['kp_3d'] = s['kp_3d'].reshape(batch_size, repeat_num, -1, 3)
                s['rotmat'] = s['rotmat'].reshape(batch_size, repeat_num, -1,
                                                  3, 3)
                s['scores'] = scores

        return smpl_output, scores