def fkl_torch(angles, parent, offset, rotInd, expmapInd):
    """
    pytorch version of fkl.

    convert joint angles to joint locations
    batch pytorch version of the fkl() method above
    :param angles: N*99
    :param parent:
    :param offset:
    :param rotInd:
    :param expmapInd:
    :return: N*joint_n*3
    """
    n = angles.data.shape[0]
    j_n = offset.shape[0]
    p3d = Variable(torch.from_numpy(offset)).float()
    if torch.cuda.is_available():
        p3d = p3d.cuda()
    p3d = p3d.unsqueeze(0).repeat(n, 1, 1)
    angles = angles[:, 3:].contiguous().view(-1, 3)
    R = data_utils.expmap2rotmat_torch(angles).view(n, j_n, 3, 3)
    for i in np.arange(1, j_n):
        if parent[i] > 0:
            R[:, i, :, :] = torch.matmul(R[:, i, :, :],
                                         R[:, parent[i], :, :]).clone()
            p3d[:, i, :] = torch.matmul(
                p3d[0, i, :], R[:, parent[i], :, :]) + p3d[:, parent[i], :]
    return p3d
示例#2
0
def euler_error(outputs, all_seq, input_n, dim_used, N):
    """

    :param outputs:
    :param all_seq:
    :param input_n:
    :param dim_used:
    :return:
    """
    n, seq_len, dim_full_len = all_seq.data.shape
    dim_used_len = len(dim_used)

    t = np.arange(1, N + 1, 1)
    A = chebyshev.chebvander(t, N - 1)
    A = Variable(torch.from_numpy(A)).float().cuda()
    outputs_t = torch.mm(A, outputs.view(-1, N).transpose(0, 1))
    outputs_exp = outputs_t.transpose(0, 1).view(-1, dim_used_len,
                                                 seq_len).transpose(1, 2)

    pred_expmap = all_seq.clone()
    dim_used = np.array(dim_used)
    pred_expmap[:, :, dim_used] = outputs_exp

    pred_expmap = pred_expmap[:,
                              input_n:, :].contiguous().view(-1, dim_full_len)
    targ_expmap = all_seq[:, input_n:, :].clone().contiguous().view(
        -1, dim_full_len)

    # pred_expmap[:, 0:6] = 0
    # targ_expmap[:, 0:6] = 0
    pred_expmap = pred_expmap.view(-1, 3)
    targ_expmap = targ_expmap.view(-1, 3)

    pred_eul = data_utils.rotmat2euler_torch(
        data_utils.expmap2rotmat_torch(pred_expmap))
    pred_eul = pred_eul.view(-1, dim_full_len)

    targ_eul = data_utils.rotmat2euler_torch(
        data_utils.expmap2rotmat_torch(targ_expmap))
    targ_eul = targ_eul.view(-1, dim_full_len)
    mean_errors = torch.mean(torch.norm(pred_eul - targ_eul, 2, 1))

    return mean_errors
示例#3
0
def euler_error(outputs, all_seq, input_n, dim_used, dct_n):
    """

    :param outputs:
    :param all_seq:
    :param input_n:
    :param dim_used:
    :return:
    """
    n, seq_len, dim_full_len = all_seq.data.shape
    dim_used_len = len(dim_used)

    _, idct_m = data_utils.get_dct_matrix(seq_len)
    idct_m = Variable(torch.from_numpy(idct_m)).float().cuda()
    outputs_t = outputs.view(-1, dct_n).transpose(0, 1)
    outputs_exp = torch.matmul(idct_m[:, :dct_n],
                               outputs_t).transpose(0, 1).contiguous().view(
                                   -1, dim_used_len, seq_len).transpose(1, 2)
    pred_expmap = all_seq.clone()
    dim_used = np.array(dim_used)
    pred_expmap[:, :, dim_used] = outputs_exp

    pred_expmap = pred_expmap[:,
                              input_n:, :].contiguous().view(-1, dim_full_len)
    targ_expmap = all_seq[:, input_n:, :].clone().contiguous().view(
        -1, dim_full_len)

    # pred_expmap[:, 0:6] = 0
    # targ_expmap[:, 0:6] = 0
    pred_expmap = pred_expmap.view(-1, 3)
    targ_expmap = targ_expmap.view(-1, 3)

    pred_eul = data_utils.rotmat2euler_torch(
        data_utils.expmap2rotmat_torch(pred_expmap))
    pred_eul = pred_eul.view(-1, dim_full_len)

    targ_eul = data_utils.rotmat2euler_torch(
        data_utils.expmap2rotmat_torch(targ_expmap))
    targ_eul = targ_eul.view(-1, dim_full_len)
    mean_errors = torch.mean(torch.norm(pred_eul - targ_eul, 2, 1))

    return mean_errors
示例#4
0
def run_model(net_pred,
              optimizer=None,
              is_train=0,
              data_loader=None,
              epo=1,
              opt=None):
    if is_train == 0:
        net_pred.train()
    else:
        net_pred.eval()

    l_ang = 0
    if is_train <= 1:
        m_ang_seq = 0
    else:
        titles = np.array(range(opt.output_n)) + 1
        m_ang_seq = np.zeros([opt.output_n])
    n = 0
    in_n = opt.input_n
    out_n = opt.output_n
    dim_used = np.array([
        6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 37, 38,
        39, 40, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 60, 61,
        62, 75, 76, 77, 78, 79, 80, 81, 84, 85, 86
    ])
    seq_in = opt.kernel_size

    itera = 1
    idx = np.expand_dims(np.arange(seq_in + out_n), axis=1) + (
        out_n - seq_in + np.expand_dims(np.arange(itera), axis=0))
    st = time.time()
    for i, (ang_h36) in enumerate(data_loader):
        batch_size, seq_n, _ = ang_h36.shape
        # when only one sample in this batch
        if batch_size == 1 and is_train == 0:
            continue
        n += batch_size
        bt = time.time()
        ang_h36 = ang_h36.float().cuda()
        ang_sup = ang_h36.clone()[:, :, dim_used][:, -out_n - seq_in:]
        ang_src = ang_h36.clone()[:, :, dim_used]
        ang_out_all = net_pred(ang_src,
                               output_n=out_n,
                               itera=itera,
                               input_n=in_n)

        ang_out = ang_h36.clone()[:, in_n:in_n + out_n]
        ang_out[:, :, dim_used] = ang_out_all[:, seq_in:, 0]

        # 2d joint loss:
        grad_norm = 0
        if is_train == 0:
            loss_ang = torch.mean(
                torch.sum(torch.abs(ang_out_all[:, :, 0] - ang_sup), dim=2))
            loss_all = loss_ang
            optimizer.zero_grad()
            loss_all.backward()
            grad_norm = nn.utils.clip_grad_norm_(list(net_pred.parameters()),
                                                 max_norm=opt.max_norm)
            optimizer.step()

            # update log values
            l_ang += loss_ang.cpu().data.numpy() * batch_size

        if is_train <= 1:  # if is validation or train simply output the overall mean error
            with torch.no_grad():
                ang_out_euler = ang_out.reshape([-1, 99]).reshape([-1, 3])
                ang_gt_euler = ang_h36[:, in_n:in_n + out_n].reshape(
                    [-1, 99]).reshape([-1, 3])

                import utils.data_utils as data_utils
                ang_out_euler = data_utils.rotmat2euler_torch(
                    data_utils.expmap2rotmat_torch(ang_out_euler))
                ang_out_euler = ang_out_euler.view(-1, 99)
                ang_gt_euler = data_utils.rotmat2euler_torch(
                    data_utils.expmap2rotmat_torch(ang_gt_euler))
                ang_gt_euler = ang_gt_euler.view(-1, 99)

                eulererr_ang_seq = torch.mean(
                    torch.norm(ang_out_euler - ang_gt_euler, dim=1))

            m_ang_seq += eulererr_ang_seq.cpu().data.numpy() * batch_size
        else:

            with torch.no_grad():
                ang_out_euler = ang_out.reshape([-1, 99]).reshape([-1, 3])
                ang_gt_euler = ang_h36[:, in_n:in_n + out_n].reshape(
                    [-1, 99]).reshape([-1, 3])

                import utils.data_utils as data_utils
                ang_out_euler = data_utils.rotmat2euler_torch(
                    data_utils.expmap2rotmat_torch(ang_out_euler))
                ang_out_euler = ang_out_euler.view(-1, out_n, 99)
                ang_gt_euler = data_utils.rotmat2euler_torch(
                    data_utils.expmap2rotmat_torch(ang_gt_euler))
                ang_gt_euler = ang_gt_euler.view(-1, out_n, 99)

                eulererr_ang_seq = torch.sum(torch.norm(ang_out_euler -
                                                        ang_gt_euler,
                                                        dim=2),
                                             dim=0)
            m_ang_seq += eulererr_ang_seq.cpu().data.numpy()
        if i % 1000 == 0:
            print('{}/{}|bt {:.3f}s|tt{:.0f}s|gn{}'.format(
                i + 1, len(data_loader),
                time.time() - bt,
                time.time() - st, grad_norm))
    ret = {}
    if is_train == 0:
        ret["l_ang"] = l_ang / n

    if is_train <= 1:
        ret["m_ang_h36"] = m_ang_seq / n
    else:
        m_ang_h36 = m_ang_seq / n
        for j in range(out_n):
            ret["#{:d}".format(titles[j])] = m_ang_h36[j]
    return ret
示例#5
0
    def test(self, train_loader, dataset='h3.6m', input_n=20, output_n=50, dct_n=20, cartesian=False,
             dim_used=[]):
        N = 0
        # t_l = 0
        if output_n >= 25:
            eval_frame = [1, 3, 7, 9, 13, 24]
        elif output_n == 10:
            eval_frame = [1, 3, 7, 9]

        t_e = np.zeros(len(eval_frame))
        t_3d = np.zeros(len(eval_frame))

        self.model.eval()
        st = time.time()
        bar = Bar('>>>', fill='>', max=len(train_loader))
        for i, (inputs, targets, all_seq) in enumerate(train_loader):
            bt = time.time()

            if self.is_cuda:
                inputs = Variable(inputs.cuda()).float()
                all_seq = Variable(all_seq.cuda(non_blocking=True)).float()

            outputs, reconstructions, log_var, z = self.model(inputs.float())
            n = outputs.shape[0]

            n, seq_len, dim_full_len = all_seq.data.shape
            dim_used_len = len(dim_used)
            all_seq[:, :, 0:6] = 0

            # inverse dct transformation
            _, idct_m = data_utils.get_dct_matrix(seq_len)
            idct_m = Variable(torch.from_numpy(idct_m)).float().cuda()
            outputs_t = outputs.view(-1, dct_n).transpose(0, 1)

            if cartesian == False:
                outputs_exp = torch.matmul(idct_m[:, :dct_n], outputs_t).transpose(0, 1).contiguous().view(-1,
                                                                                                           dim_used_len,
                                                                                                           seq_len).transpose(
                    1, 2)
                pred_expmap = all_seq.clone()
                dim_used = np.array(dim_used)
                pred_expmap[:, :, dim_used] = outputs_exp
                pred_expmap = pred_expmap[:, input_n:, :].contiguous().view(-1, dim_full_len)
                targ_expmap = all_seq[:, input_n:, :].clone().contiguous().view(-1, dim_full_len)

                pred_expmap[:, 0:6] = 0
                targ_expmap[:, 0:6] = 0
                pred_expmap = pred_expmap.view(-1, 3)
                targ_expmap = targ_expmap.view(-1, 3)

                # get euler angles from expmap
                pred_eul = data_utils.rotmat2euler_torch(data_utils.expmap2rotmat_torch(pred_expmap))
                pred_eul = pred_eul.view(-1, dim_full_len).view(-1, output_n, dim_full_len)
                targ_eul = data_utils.rotmat2euler_torch(data_utils.expmap2rotmat_torch(targ_expmap))
                targ_eul = targ_eul.view(-1, dim_full_len).view(-1, output_n, dim_full_len)
                if dataset == 'h3.6m':
                    # get 3d coordinates
                    targ_p3d = data_utils.expmap2xyz_torch(targ_expmap.view(-1, dim_full_len)).view(n, output_n, -1, 3)
                    pred_p3d = data_utils.expmap2xyz_torch(pred_expmap.view(-1, dim_full_len)).view(n, output_n, -1, 3)
                elif dataset == 'cmu_mocap':
                    # get 3d coordinates
                    targ_p3d = data_utils.expmap2xyz_torch_cmu(targ_expmap.view(-1, dim_full_len)).view(n, output_n, -1,
                                                                                                        3)
                    pred_p3d = data_utils.expmap2xyz_torch_cmu(pred_expmap.view(-1, dim_full_len)).view(n, output_n, -1,
                                                                                                        3)
                for k in np.arange(0, len(eval_frame)):
                    j = eval_frame[k]
                    t_e[k] += torch.mean(torch.norm(pred_eul[:, j, :] - targ_eul[:, j, :], 2, 1)).cpu().data.numpy() * n
                    t_3d[k] += torch.mean(torch.norm(
                        targ_p3d[:, j, :, :].contiguous().view(-1, 3) - pred_p3d[:, j, :, :].contiguous().view(-1, 3),
                        2, 1)).cpu().data.numpy() * n

            elif cartesian:
                outputs_3d = torch.matmul(idct_m[:, :dct_n], outputs_t).transpose(0, 1).contiguous().view(-1,
                                                                                                          dim_used_len,
                                                                                                          seq_len).transpose(
                    1, 2)
                pred_3d = all_seq.clone()
                dim_used = np.array(dim_used)

                # deal with joints at same position
                joint_to_ignore = np.array([16, 20, 29, 24, 27, 33, 36])
                index_to_ignore = np.concatenate(
                    (joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2))
                joint_equal = np.array([15, 15, 15, 23, 23, 32, 32])
                index_to_equal = np.concatenate((joint_equal * 3, joint_equal * 3 + 1, joint_equal * 3 + 2))

                pred_3d[:, :, dim_used] = outputs_3d
                pred_3d[:, :, index_to_ignore] = pred_3d[:, :, index_to_equal]
                pred_p3d = pred_3d.contiguous().view(n, seq_len, -1, 3)[:, input_n:, :, :]
                targ_p3d = all_seq.contiguous().view(n, seq_len, -1, 3)[:, input_n:, :, :]
                for k in np.arange(0, len(eval_frame)):
                    j = eval_frame[k]
                    t_e[k] += torch.mean(torch.norm(
                        targ_p3d[:, j, :, :].contiguous().view(-1, 3) - pred_p3d[:, j, :, :].contiguous().view(-1, 3),
                        2, 1)).cpu().data.numpy()[0] * n
                    t_3d[k] += torch.mean(torch.norm(
                        targ_p3d[:, j, :, :].contiguous().view(-1, 3) - pred_p3d[:, j, :, :].contiguous().view(-1, 3),
                        2, 1)).cpu().data.numpy()[0] * n

            N += n

            bar.suffix = '{}/{}|batch time {:.4f}s|total time{:.2f}s'.format(i + 1, len(train_loader), time.time() - bt,
                                                                             time.time() - st)
            bar.next()
        bar.finish()
        return t_e / N, t_3d / N
示例#6
0
def test(train_loader,
         model,
         stepsize,
         input_n=20,
         output_n=50,
         dct_n=20,
         is_cuda=False,
         dim_used=[]):
    N = 0
    # t_l = 0
    if output_n == 25:
        eval_frame = [1, 3, 7, 9, 13, 24]
    elif output_n == 50:
        eval_frame = [1, 3, 7, 9, 13, 24, 35]
    elif output_n == 100:
        eval_frame = [1, 3, 7, 9, 13, 24, 35, 49]
    elif output_n == 10:
        eval_frame = [1, 3, 7, 9]

    t_e = np.zeros(len(eval_frame))
    t_3d = np.zeros(len(eval_frame))

    model.eval()
    # calculate no of iterations in auto regression to perform
    iterations = int(output_n / stepsize)
    print('iterations: {}'.format(iterations))
    st = time.time()
    bar = Bar('>>>', fill='>', max=len(train_loader))
    for i, (inputs, targets, all_seq) in enumerate(train_loader):
        bt = time.time()

        all_seq = Variable(all_seq).float()
        dim_used_len = len(dim_used)
        if is_cuda:
            all_seq = all_seq.cuda()
        dct_m_in, _ = data_utils.get_dct_matrix(dct_n)
        dct_m_in = Variable(torch.from_numpy(dct_m_in)).float().cuda()
        _, idct_m = data_utils.get_dct_matrix(dct_n)
        idct_m = Variable(torch.from_numpy(idct_m)).float().cuda()
        n, seq_len, dim_full_len = all_seq.data.shape
        y_hat = None
        # Auto regression
        for idx in range(iterations):
            # start index of the input sequence
            start = input_n + idx * stepsize
            # end index of the input sequence
            stop = start + stepsize
            if y_hat is None:
                # slice the sequence of length = (input_n + output_n) in iteration 1
                input_seq = all_seq[:, :dct_n, dim_used]
            else:
                # stack output from prev iteration and next frames to form the next input seq
                input_seq = torch.cat(
                    (y_hat, all_seq[:, start:stop, dim_used]), 1)
            # calculate DCT of the input seq
            input_dct_seq = torch.matmul(dct_m_in, input_seq).transpose(1, 2)
            if is_cuda:
                input_dct_seq = input_dct_seq.cuda()
            y = model(input_dct_seq)
            y_t = y.view(-1, dct_n).transpose(0, 1)
            y_exp = torch.matmul(idct_m,
                                 y_t).transpose(0, 1).contiguous().view(
                                     -1, dim_used_len, dct_n).transpose(1, 2)
            y_hat = y_exp[:, stepsize:, :]
            # accumulate the output frames in a single tensor
            if idx == 0:
                outputs = y_exp
            else:
                outputs = torch.cat((outputs, y_exp[:, input_n:, :]), 1)
        pred_expmap = all_seq.clone()
        dim_used = np.array(dim_used)
        pred_expmap[:, :, dim_used] = outputs
        pred_expmap = pred_expmap[:, input_n:, :].contiguous().view(
            -1, dim_full_len)
        targ_expmap = all_seq[:, input_n:, :].clone().contiguous().view(
            -1, dim_full_len)

        pred_expmap[:, 0:6] = 0
        targ_expmap[:, 0:6] = 0
        pred_expmap = pred_expmap.view(-1, 3)
        targ_expmap = targ_expmap.view(-1, 3)

        # get euler angles from expmap
        pred_eul = data_utils.rotmat2euler_torch(
            data_utils.expmap2rotmat_torch(pred_expmap))
        pred_eul = pred_eul.view(-1,
                                 dim_full_len).view(-1, output_n, dim_full_len)
        targ_eul = data_utils.rotmat2euler_torch(
            data_utils.expmap2rotmat_torch(targ_expmap))
        targ_eul = targ_eul.view(-1,
                                 dim_full_len).view(-1, output_n, dim_full_len)

        # get 3d coordinates
        targ_p3d = data_utils.expmap2xyz_torch(
            targ_expmap.view(-1, dim_full_len)).view(n, output_n, -1, 3)
        pred_p3d = data_utils.expmap2xyz_torch(
            pred_expmap.view(-1, dim_full_len)).view(n, output_n, -1, 3)

        # update loss and testing errors
        for k in np.arange(0, len(eval_frame)):
            j = eval_frame[k]
            '''
            t_e[k] += torch.mean(torch.norm(pred_eul[:, j, :] - targ_eul[:, j, :], 2, 1)).cpu().data.numpy()[0] * n
            t_3d[k] += torch.mean(torch.norm(
                targ_p3d[:, j, :, :].contiguous().view(-1, 3) - pred_p3d[:, j, :, :].contiguous().view(-1, 3), 2,
                1)).cpu().data.numpy()[0] * n
            '''
            t_e[k] += torch.mean(
                torch.norm(pred_eul[:, j, :] - targ_eul[:, j, :], 2,
                           1)).item() * n
            t_3d[k] += torch.mean(
                torch.norm(
                    targ_p3d[:, j, :, :].contiguous().view(-1, 3) -
                    pred_p3d[:, j, :, :].contiguous().view(-1, 3), 2,
                    1)).item() * n
        # t_l += loss.cpu().data.numpy()[0] * n
        N += n

        bar.suffix = '{}/{}|batch time {:.4f}s|total time{:.2f}s'.format(
            i + 1, len(train_loader),
            time.time() - bt,
            time.time() - st)
        bar.next()
    bar.finish()
    return t_e / N, t_3d / N
示例#7
0
def test(train_loader,
         model,
         input_n=20,
         output_n=50,
         is_cuda=False,
         dim_used=[],
         dct_n=20):
    N = 0
    if output_n == 25:
        eval_frame = [1, 3, 7, 9, 13, 24]
    elif output_n == 10:
        eval_frame = [1, 3, 7, 9]
    t_e = np.zeros(len(eval_frame))
    t_3d = np.zeros(len(eval_frame))

    model.eval()
    st = time.time()
    bar = Bar('>>>', fill='>', max=len(train_loader))
    for i, (inputs, targets, all_seq) in enumerate(train_loader):
        bt = time.time()

        if is_cuda:
            inputs = Variable(inputs.cuda()).float()
            all_seq = Variable(all_seq.cuda(async=True)).float()

        outputs = model(inputs)

        n, seq_len, dim_full_len = all_seq.data.shape
        dim_used_len = len(dim_used)
        all_seq[:, :, 0:6] = 0

        _, idct_m = data_utils.get_dct_matrix(seq_len)
        idct_m = Variable(torch.from_numpy(idct_m)).float().cuda()
        outputs_t = outputs.view(-1, seq_len).transpose(0, 1)
        outputs_exp = torch.matmul(idct_m[:, :dct_n], outputs_t).transpose(0, 1).contiguous().view \
            (-1, dim_used_len, seq_len).transpose(1, 2)

        pred_expmap = all_seq.clone()
        dim_used = np.array(dim_used)
        pred_expmap[:, :, dim_used] = outputs_exp

        pred_expmap = pred_expmap[:, input_n:, :].contiguous().view(
            -1, dim_full_len)
        targ_expmap = all_seq[:, input_n:, :].clone().contiguous().view(
            -1, dim_full_len)

        pred_expmap = pred_expmap.view(-1, 3)
        targ_expmap = targ_expmap.view(-1, 3)

        pred_eul = data_utils.rotmat2euler_torch(
            data_utils.expmap2rotmat_torch(pred_expmap))
        pred_eul = pred_eul.view(-1, dim_full_len).view(
            -1, output_n, dim_full_len)  # [:, :, dim_used]
        targ_eul = data_utils.rotmat2euler_torch(
            data_utils.expmap2rotmat_torch(targ_expmap))
        targ_eul = targ_eul.view(-1, dim_full_len).view(
            -1, output_n, dim_full_len)  # [:, :, dim_used]

        targ_p3d = data_utils.expmap2xyz_torch_cmu(
            targ_expmap.view(-1, dim_full_len)).view(n, output_n, -1, 3)
        pred_p3d = data_utils.expmap2xyz_torch_cmu(
            pred_expmap.view(-1, dim_full_len)).view(n, output_n, -1, 3)

        for k in np.arange(0, len(eval_frame)):
            j = eval_frame[k]
            t_e[k] += torch.mean(
                torch.norm(pred_eul[:, j, :] - targ_eul[:, j, :], 2,
                           1)).cpu().data.numpy()[0] * n
            t_3d[k] += torch.mean(
                torch.norm(
                    targ_p3d[:, j, :, :].contiguous().view(-1, 3) -
                    pred_p3d[:, j, :, :].contiguous().view(-1, 3), 2,
                    1)).cpu().data.numpy()[0] * n

        # update the training loss
        N += n

        bar.suffix = '{}/{}|batch time {:.4f}s|total time{:.2f}s'.format(
            i + 1, len(train_loader),
            time.time() - bt,
            time.time() - st)
        bar.next()
    bar.finish()
    return t_e / N, t_3d / N
示例#8
0
def test(train_loader,
         model,
         input_n=20,
         output_n=50,
         is_cuda=False,
         dim_used=[]):
    N = 0
    # t_l = 0
    if output_n >= 25:
        eval_frame = [1, 3, 7, 9, 13, 24]
    elif output_n == 10:
        eval_frame = [1, 3, 7, 9]
    else:
        eval_frame = [1, 2, 3, 4]

    t_e = np.zeros(len(eval_frame))
    t_3d = np.zeros(len(eval_frame))

    model.eval()
    st = time.time()
    bar = Bar('>>>', fill='>', max=len(train_loader))
    for i, (inputs, targets, all_seq) in enumerate(train_loader):
        bt = time.time()

        if is_cuda:
            inputs = Variable(inputs.cuda()).float()
            # targets = Variable(targets.cuda(async=True)).float()
            all_seq = Variable(all_seq.cuda()).float()

        outputs = model(inputs)

        # inverse dct transformation
        n, seq_len, dim_full_len = all_seq.data.shape
        dim_used_len = len(dim_used)
        N = input_n + output_n
        t = np.arange(1, N + 1, 1)
        A = chebyshev.chebvander(t, N - 1)
        A = Variable(torch.from_numpy(A)).float().cuda()
        outputs_t = torch.matmul(A, outputs.view(-1, N).transpose(0, 1))
        outputs_expmap = outputs_t.transpose(0,
                                             1).view(-1, dim_used_len,
                                                     seq_len).transpose(1, 2)

        pred_expmap = all_seq.clone()
        dim_used = np.array(dim_used)
        pred_expmap[:, :, dim_used] = outputs_expmap
        pred_expmap = pred_expmap[:, input_n:, :].contiguous().view(
            -1, dim_full_len)
        targ_expmap = all_seq[:, input_n:, :].clone().contiguous().view(
            -1, dim_full_len)

        pred_expmap[:, 0:6] = 0
        targ_expmap[:, 0:6] = 0
        pred_expmap = pred_expmap.view(-1, 3)
        targ_expmap = targ_expmap.view(-1, 3)

        # get euler angles from expmap
        pred_eul = data_utils.rotmat2euler_torch(
            data_utils.expmap2rotmat_torch(pred_expmap))
        pred_eul = pred_eul.view(-1,
                                 dim_full_len).view(-1, output_n, dim_full_len)
        targ_eul = data_utils.rotmat2euler_torch(
            data_utils.expmap2rotmat_torch(targ_expmap))
        targ_eul = targ_eul.view(-1,
                                 dim_full_len).view(-1, output_n, dim_full_len)

        # get 3d coordinates
        targ_p3d = data_utils.expmap2xyz_torch(
            targ_expmap.view(-1, dim_full_len)).view(n, output_n, -1, 3)
        pred_p3d = data_utils.expmap2xyz_torch(
            pred_expmap.view(-1, dim_full_len)).view(n, output_n, -1, 3)

        # update loss and testing errors
        for k in np.arange(0, len(eval_frame)):
            j = eval_frame[k]
            t_e[k] += torch.mean(
                torch.norm(pred_eul[:, j, :] - targ_eul[:, j, :], 2,
                           1)).cpu().data.numpy() * n
            t_3d[k] += torch.mean(
                torch.norm(
                    targ_p3d[:, j, :, :].contiguous().view(-1, 3) -
                    pred_p3d[:, j, :, :].contiguous().view(-1, 3), 2,
                    1)).cpu().data.numpy() * n
        # t_l += loss.cpu().data.numpy()[0] * n
        N += n

        bar.suffix = '{}/{}|batch time {:.4f}s|total time{:.2f}s'.format(
            i + 1, len(train_loader),
            time.time() - bt,
            time.time() - st)
        bar.next()
    bar.finish()
    return t_e / N, t_3d / N