示例#1
0
def factor_solve_kkt(Q, D, G, A, rx, rs, rz, ry):
    nineq, nz, neq, _ = get_sizes(G, A)

    if neq > 0:
        H_ = torch.cat([
            torch.cat([Q, torch.zeros(nz, nineq).type_as(Q)], 1),
            torch.cat([torch.zeros(nineq, nz).type_as(Q), D], 1)
        ], 0)
        A_ = torch.cat([
            torch.cat([G, torch.eye(nineq).type_as(Q)], 1),
            torch.cat([A, torch.zeros(neq, nineq).type_as(Q)], 1)
        ], 0)
        g_ = torch.cat([rx, rs], 0)
        h_ = torch.cat([rz, ry], 0)
    else:
        H_ = torch.cat([
            torch.cat([Q, torch.zeros(nz, nineq).type_as(Q)], 1),
            torch.cat([torch.zeros(nineq, nz).type_as(Q), D], 1)
        ], 0)
        A_ = torch.cat([G, torch.eye(nineq).type_as(Q)], 1)
        g_ = torch.cat([rx, rs], 0)
        h_ = rz

    U_H_ = torch.potrf(H_)

    invH_A_ = torch.potrs(A_.t(), U_H_)
    invH_g_ = torch.potrs(g_.view(-1, 1), U_H_).view(-1)

    S_ = torch.mm(A_, invH_A_)
    U_S_ = torch.potrf(S_)
    t_ = torch.mv(A_, invH_g_).view(-1, 1) - h_
    w_ = -torch.potrs(t_, U_S_).view(-1)
    v_ = torch.potrs(-g_.view(-1, 1) - torch.mv(A_.t(), w_), U_H_).view(-1)

    return v_[:nz], v_[nz:], w_[:nineq], w_[nineq:] if neq > 0 else None
示例#2
0
def pre_factor_kkt(Q, G, A):
    """ Perform all one-time factorizations and cache relevant matrix products"""
    nineq, nz, neq, _ = get_sizes(G, A)

    # S = [ A Q^{-1} A^T        A Q^{-1} G^T           ]
    #     [ G Q^{-1} A^T        G Q^{-1} G^T + D^{-1} ]

    U_Q = torch.potrf(Q)
    # partial cholesky of S matrix
    U_S = torch.zeros(neq + nineq, neq + nineq).type_as(Q)

    G_invQ_GT = torch.mm(G, torch.potrs(G.t(), U_Q))
    R = G_invQ_GT
    if neq > 0:
        invQ_AT = torch.potrs(A.t(), U_Q)
        A_invQ_AT = torch.mm(A, invQ_AT)
        G_invQ_AT = torch.mm(G, invQ_AT)

        # TODO: torch.potrf sometimes says the matrix is not PSD but
        # numpy does? I filed an issue at
        # https://github.com/pytorch/pytorch/issues/199
        try:
            U11 = torch.potrf(A_invQ_AT)
        except:
            U11 = torch.Tensor(np.linalg.cholesky(
                A_invQ_AT.cpu().numpy())).type_as(A_invQ_AT)

        # TODO: torch.trtrs is currently not implemented on the GPU
        # and we are using gesv as a workaround.
        U12 = torch.gesv(G_invQ_AT.t(), U11.t())[0]
        U_S[:neq, :neq] = U11
        U_S[:neq, neq:] = U12
        R -= torch.mm(U12.t(), U12)

    return U_Q, U_S, R
示例#3
0
def solve_kkt(U_Q, d, G, A, U_S, rx, rs, rz, ry, dbg=False):
    """ Solve KKT equations for the affine step"""
    nineq, nz, neq, _ = get_sizes(G, A)

    invQ_rx = torch.potrs(rx.view(-1, 1), U_Q).view(-1)
    if neq > 0:
        h = torch.cat(
            [torch.mv(A, invQ_rx) - ry,
             torch.mv(G, invQ_rx) + rs / d - rz], 0)
    else:
        h = torch.mv(G, invQ_rx) + rs / d - rz

    w = -torch.potrs(h.view(-1, 1), U_S).view(-1)

    g1 = -rx - torch.mv(G.t(), w[neq:])
    if neq > 0:
        g1 -= torch.mv(A.t(), w[:neq])
    g2 = -rs - w[neq:]

    dx = torch.potrs(g1.view(-1, 1), U_Q).view(-1)
    ds = g2 / d
    dz = w[neq:]
    dy = w[:neq] if neq > 0 else None

    # if np.all(np.array([x.norm() for x in [rx, rs, rz, ry]]) != 0):
    if dbg:
        import IPython
        import sys
        IPython.embed()
        sys.exit(-1)

    # if rs.norm() > 0: import IPython, sys; IPython.embed(); sys.exit(-1)
    return dx, ds, dz, dy
示例#4
0
    def forward(self, x_train, y_train, x_test=None):
        # See the autograd section for explanation of what happens here.
        n = x_train.size(0)
        p = x_train.size(-1)
        d = torch.zeros(n, n)
        for i in range(p):
            d += 0.5 * (x_train[:, i].unsqueeze(1) - x_train[:, i].unsqueeze(0)
                        ).pow(2) / self.lengthscale[i].pow(2)

        kyy = self.sigma_f.pow(2) * torch.exp(-d) + self.sigma_n.pow(
            2) * torch.eye(n)
        c = torch.cholesky(kyy, upper=True)
        # v = torch.potrs(y_train, c, upper=True)
        v, _ = torch.gesv(y_train.unsqueeze(1), kyy)
        # v = torch.cholesky_solve(y_train.unsqueeze(1), c, upper=True)
        if x_test is None:
            out = (c, v)

        if x_test is not None:
            with torch.no_grad():
                ntest = x_test.size(0)
                d = torch.zeros(ntest, n)
                for i in range(p):
                    d += 0.5 * (x_test[:, i].unsqueeze(1) -
                                x_train[:, i].unsqueeze(0)
                                ).pow(2) / self.lengthscale[i].pow(2)
                kfy = self.sigma_f.pow(2) * torch.exp(-d)
                # solve
                f_test = kfy.mm(v)
                tmp = torch.potrs(kfy.t(), c, upper=True)
                # tmp = torch.cholesky_solve(kfy.t(), c, upper=True)
                tmp = torch.sum(kfy * tmp.t(), dim=1)
                cov_f = self.sigma_f.pow(2) - tmp
            out = (f_test, cov_f)
        return out
示例#5
0
    def _output_output_covariance(self, input, Beta_a, lengthscale_a, variance_a, mu_a, Kff_inv_a,
                                        Beta_b, lengthscale_b, variance_b, mu_b, Kff_inv_b,
                                        mean, covariance):
        """

        :param input:  traing inputs H x (n+m)
        :param Beta_a:  cached Beta for output dim a,  1 x H
        :param lengthscale_a: legnth scale of the RBF kernel for output dim a, 1 x (n + m)
        :param Kff_inv_a: for output dim a , H x H
        :param variance_a:  variance of the kernel for output dim a
        :param mu_a: prediction for the mean of GP under uncertain inputs for output dim a
        :param Beta_b: cached Beta for output dim b,  H x (n+m)
        :param lengthscale_b: legnth scale of the RBF kernel for output dim b, 1 x H
        :param Kff_inv_b: for output dim b , H x H
        :param variance_b: variance of the kernel for output dim b
        :param mu_b: prediction for the mean of GP under uncertain inputs for output dim b
        :param mean: mean for the uncertain inputs 1 x (n + m)
        :param covariance: covariance for the uncertain inputs (n + m) x (n + m)
        :return:
        """
        assert (input.size()[1] == mean.size()[1])

        mean.requires_grad = True
        covariance.requires_grad = True

        # eq 12 of ref.[1]
        #with torch.no_grad():

        mat1 = 1 / ((1 / lengthscale_a).diag() + (1 / lengthscale_b).diag())
        R = mat1 + covariance
        det = (torch.det(R) ** -0.5) * (torch.det(mat1) ** 0.5)

        # H x 1 x (n+m) -/+ H x (n+m) = H x H x (n+m)
        diff_m = (input.unsqueeze(1) - input) / 2.
        sum_m = (input.unsqueeze(1) * lengthscale_a + input * lengthscale_b) / (lengthscale_a + lengthscale_b)

        mat2 = R.potrf(upper=False)
        mat3 = torch.potrs(torch.eye(mat1.size()[0]), mat2, upper=False)

        # elementwise computation
        # H x H
        mat4 = ((diff_m ** 2 / (lengthscale_a + lengthscale_b)).sum(dim=-1)) * -0.5
        mat5 = sum_m - mean

        # H x H x 1 x (n+m) * (n+m) x (n+m) @ H x H x (n+m) x 1 = H x H x 1 x 1 TODO MAYBE CONSIDER ADD SOME JITTER ?
        mat6 = (torch.matmul(mat5.unsqueeze(2), torch.matmul(mat3, mat5.unsqueeze(-1)))) * -0.5
        # H by H
        L = variance_a * variance_b * det * torch.mul(torch.exp(mat4), torch.exp(mat6.view(input.size()[0], input.size()[0])))
        cov = torch.matmul(Beta_a, torch.matmul(L, Beta_b)) - mu_a * mu_b

        # the diagonal term
        if ((Beta_a == Beta_b).all() and (lengthscale_a == lengthscale_b).all()
                                     and (variance_a == variance_b).all() and (mu_a == mu_b).all()
                                     and (Kff_inv_a == Kff_inv_b).all()):

            cov = cov + variance_a - torch.trace(torch.matmul(Kff_inv_a, L))

        #TODO Compute the gradient
        cov.backward()
        return cov, mean.grad.data, covariance.grad.data
示例#6
0
 def train_locator_model(self, model_XTX, model_XTY, model=None):
     if model is None:
         model = torch.potrs(model_XTY, torch.potrf(model_XTX))
     else:
         for _ in range(30):
             model, _ = torch.trtrs(model_XTY - torch.mm(torch.triu(model_XTX, diagonal=1), model), torch.tril(model_XTX, diagonal=0), upper=False)
     return model
示例#7
0
    def test_cg_with_tridiag(self):
        size = 10
        matrix = torch.DoubleTensor(size, size).normal_()
        matrix = matrix.matmul(matrix.transpose(-1, -2))
        matrix.div_(matrix.norm())
        matrix.add_(torch.DoubleTensor(matrix.size(-1)).fill_(1e-1).diag())

        rhs = torch.DoubleTensor(size, 50).normal_()
        solves, t_mats = linear_cg(
            matrix.matmul,
            rhs=rhs,
            n_tridiag=5,
            max_iter=size,
            tolerance=0,
        )

        # Check cg
        matrix_chol = matrix.potrf()
        actual = torch.potrs(rhs, matrix_chol)
        self.assertTrue(approx_equal(solves, actual))

        # Check tridiag
        eigs = matrix.symeig()[0]
        for i in range(5):
            approx_eigs = t_mats[i].symeig()[0]
            self.assertTrue(approx_equal(eigs, approx_eigs))
示例#8
0
def gauss_kl_diag(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume multiple independent distributions, given by the columns of
    q_mu and q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a matrix, each column represents the diagonal of a square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    """
    L = torch.potrf(K, upper=False)
    alpha, _ = torch.gesv(q_mu, L)
    KL = 0.5 * (alpha**2).sum()  # Mahalanobis term.
    num_latent = q_sqrt.size(1)
    KL += num_latent * torch.diag(L).log().sum()  # Prior log-det term.
    KL += -0.5 * q_sqrt.numel()  # constant term
    KL += -q_sqrt.log().sum()  # Log-det of q-cov
    K_inv, _ = torch.potrs(Variable(torch.eye(L.size(0), out=L.data.new())),
                           L,
                           upper=False)
    KL += 0.5 * (torch.diag(K_inv).unsqueeze(1) *
                 q_sqrt**2).sum()  # Trace term.
    return KL
示例#9
0
    def test_batch_cg_with_tridiag(self):
        batch = 5
        size = 10
        matrix = torch.DoubleTensor(batch, size, size).normal_()
        matrix = matrix.matmul(matrix.transpose(-1, -2))
        matrix.div_(matrix.norm())
        matrix.add_(torch.DoubleTensor(matrix.size(-1)).fill_(1e-1).diag())

        rhs = torch.DoubleTensor(batch, size, 50).normal_()
        solves, t_mats = linear_cg(
            matrix.matmul,
            rhs=rhs,
            n_tridiag=8,
            max_iter=size,
            tolerance=0,
        )

        # Check cg
        matrix_chol = torch.cat(
            [matrix[i].potrf().unsqueeze(0) for i in range(5)])
        actual = torch.cat([
            torch.potrs(rhs[i], matrix_chol[i]).unsqueeze(0) for i in range(5)
        ])
        self.assertTrue(approx_equal(solves, actual))

        # Check tridiag
        for i in range(5):
            eigs = matrix[i].symeig()[0]
            for j in range(8):
                approx_eigs = t_mats[j, i].symeig()[0]
                self.assertLess(
                    torch.mean(torch.abs((eigs - approx_eigs) / eigs)),
                    0.05,
                )
示例#10
0
    def test_potrs(self):
        chol = torch.tensor([[1, 0, 0, 0], [2, 1, 0, 0], [0, 1, 2, 0], [0, 0, 2, 3]], dtype=torch.float).unsqueeze(0)

        mat = torch.randn(1, 4, 3)
        self.assertTrue(
            approx_equal(torch.potrs(mat[0], chol[0], upper=False), tridiag_batch_potrs(mat, chol, upper=False)[0])
        )
示例#11
0
def batch_potrs(mat, chol):
    """
    """
    potrs_list = []
    for i in range(mat.size(0)):
        potrs_list.append(torch.potrs(mat[i], chol[i]).unsqueeze(0))
    return torch.cat(potrs_list, 0)
示例#12
0
    def filtering(self, observation, mu_s_curr, sigma_s_curr, index=None):
        """
        filtering from p(x(k) | y(1:k-1)), updated using p(y(k) | x(k)), to get p(x(k) | y(1:k))
        :param mean_pred: mean of p(x(k) | y(1:k-1)),
        :param covariance_pred: covariance of p(x(k) | y(1:k-1))
        :return:
        """

        # first compute the predtion of measurement based on the observation model
        if self.option == "GP":
            #print(self.lengthscale_o, sigma_s_curr)
            mu_o_curr, sigma_o_curr = self._prediction(self.X_o, self.Beta_o, self.lengthscale_o, self.K_o_var, self.Kff_o_inv, self.noise_o, mu_s_curr, sigma_s_curr, flag='filtering')

            Cov_yx, Cov_xy = self._compute_cov(self.X_o, mu_s_curr, self.mu_o_curr,
                                               self.lengthscale_o, sigma_s_curr, self.K_o_var, self.Beta_o)
        else:
            assert (index != 0 and index <= len(self.Xu_o)), "state transition models have dimension {}, index is {}.".format(len(self.Xu_o), index)
            mu_o_curr, sigma_o_curr = self._prediction(self.Xu_o[index], self.zip_cached_o, mu_s_curr, sigma_s_curr)
            Cov_yx, Cov_xy = self._compute_cov(self.Xu_o[index], mu_s_curr, self.mu_o_curr,
                                              self.lengthscale_o, sigma_s_curr, self.K_o_var, self.Beta_o)
            
        self.mu_o_curr, self.sigma_o_curr = mu_o_curr, sigma_o_curr
        #print(observation, self.mu_o_curr, self.sigma_o_curr)
        sigma_o_curr_inv = torch.potrs(torch.eye(sigma_o_curr.size()[0]), sigma_o_curr.potrf(upper=False), upper=False)
        mu_hat_s_curr = mu_s_curr + torch.matmul(Cov_xy, torch.matmul(sigma_o_curr_inv, (observation - mu_o_curr)))
        sigma_hat_s_curr = sigma_s_curr - torch.matmul(Cov_xy, torch.matmul(sigma_o_curr_inv, Cov_yx))

        self.mu_hat_s_curr, self.sigma_hat_s_curr = mu_hat_s_curr, sigma_hat_s_curr
        self.mu_hat_s_curr_lis.append(self.mu_hat_s_curr.clone())
        self.sigma_hat_s_curr_lis.append(self.sigma_hat_s_curr.clone())


        return mu_hat_s_curr, sigma_hat_s_curr
示例#13
0
    def test_cg_with_tridiag(self):
        size = 10
        matrix = torch.randn(size, size, dtype=torch.float64)
        matrix = matrix.matmul(matrix.transpose(-1, -2))
        matrix.div_(matrix.norm())
        matrix.add_(torch.eye(matrix.size(-1), dtype=torch.float64).mul_(1e-1))

        rhs = torch.randn(size, 50, dtype=torch.float64)
        solves, t_mats = linear_cg(matrix.matmul,
                                   rhs=rhs,
                                   n_tridiag=5,
                                   max_tridiag_iter=10,
                                   max_iter=size,
                                   tolerance=0,
                                   eps=1e-15)

        # Check cg
        matrix_chol = matrix.cholesky(upper=True)
        actual = torch.potrs(rhs, matrix_chol)
        self.assertTrue(approx_equal(solves, actual))

        # Check tridiag
        eigs = matrix.symeig()[0]
        for i in range(5):
            approx_eigs = t_mats[i].symeig()[0]
            self.assertTrue(approx_equal(eigs, approx_eigs))
示例#14
0
    def test_batch_cg_with_tridiag(self):
        batch = 5
        size = 10
        matrix = torch.randn(batch, size, size, dtype=torch.float64)
        matrix = matrix.matmul(matrix.transpose(-1, -2))
        matrix.div_(matrix.norm())
        matrix.add_(torch.eye(matrix.size(-1), dtype=torch.float64).mul_(1e-1))

        rhs = torch.randn(batch, size, 50, dtype=torch.float64)
        solves, t_mats = linear_cg(matrix.matmul,
                                   rhs=rhs,
                                   n_tridiag=8,
                                   max_iter=size,
                                   max_tridiag_iter=10,
                                   tolerance=0,
                                   eps=1e-20)

        # Check cg
        matrix_chol = torch.cholesky(matrix, upper=True)
        actual = torch.potrs(rhs, matrix_chol)
        self.assertTrue(approx_equal(solves, actual))

        # Check tridiag
        for i in range(5):
            eigs = matrix[i].symeig()[0]
            for j in range(8):
                approx_eigs = t_mats[j, i].symeig()[0]
                self.assertLess(
                    torch.mean(torch.abs((eigs - approx_eigs) / eigs)), 0.05)
示例#15
0
def linear_solve_compat(matrix, matrix_chol, y):
    """Solves the equation ``torch.mm(matrix, x) = y`` for x."""
    if matrix.requires_grad or y.requires_grad:
        # If derivatives are required, use the more expensive gesv.
        return torch.gesv(y, matrix)[0]
    else:
        # Use the cheaper Cholesky solver.
        return torch.potrs(y, matrix_chol)
示例#16
0
def test_potrs():
    chol = torch.Tensor([
        [1, 0, 0, 0],
        [2, 1, 0, 0],
        [0, 1, 2, 0],
        [0, 0, 2, 3],
    ]).unsqueeze(0)

    mat = torch.randn(1, 4, 3)
    assert approx_equal(torch.potrs(mat[0], chol[0], upper=False), tridiag_batch_potrs(mat, chol, upper=False)[0])
示例#17
0
def batch_potrs(mat, chol):
    """
    TODO: Replace with torch batch potrs once it is implemented.
    """
    potrs_list = []
    potrs_list = [
        torch.potrs(sub_mat, sub_chol) for sub_mat, sub_chol in zip(
            mat.view(-1, *mat.shape[-2:]), chol.view(-1, *chol.shape[-2:]))
    ]
    res = torch.cat(potrs_list, 0)
    return res.view_as(mat)
示例#18
0
def woodbury_factor(low_rank_mat, shift):
    """
    Given a low rank (k x n) matrix V and a shift, returns the
    matrix R so that
        R = (I_k + 1/shift VV')^{-1}V
    to be used in solves with (V'V + shift I) via the Woodbury formula
    """
    k = low_rank_mat.size(-2)
    shifted_mat = (1 / shift) * low_rank_mat.matmul(
        low_rank_mat.transpose(-1, -2))

    shifted_mat = shifted_mat + shifted_mat.new(k).fill_(1).diag()

    if low_rank_mat.ndimension() == 3:
        R = torch.cat([
            torch.potrs(low_rank_mat[i], shifted_mat[i].potrf()).unsqueeze(0)
            for i in range(shifted_mat.size(0))
        ])
    else:
        R = torch.potrs(low_rank_mat, shifted_mat.potrf())

    return R
示例#19
0
    def cache_variable(self):
        #Beta = None
        for (i, GP_dyn) in enumerate(self.GP_dyn):
            if self.option == 'GP':
                noise = GP_dyn.guide()
                Kff = GP_dyn.kernel(self.X_hat).contiguous()
                Kff.view(-1)[::self.X_hat.size()[0] + 1] += GP_dyn.get_param('noise')
                Lff=  Kff.potrf(upper=False)
                self.Kff_inv[i, :, :] = torch.potrs(torch.eye(self.X_hat.size()[0]), Lff, upper=False)
                self.Beta[i, :] = torch.potrs(self.dX[:, i], Lff, upper=False).squeeze(-1)
                self.K_var[i] = GP_dyn.kernel.get_param("variance")
                self.lengthscale[i, :] = GP_dyn.kernel.get_param("lengthscale")
                self.noise[i, :] = noise


            else:
                Xu, noise = GP_dyn.guide()
                if (GP_dyn.approx == 'DTC' or GP_dyn.option == 'VFE'):
                    Kff_inv, Beta = self._compute_cached_var_ssgp(GP_dyn, Xu, noise, "DTC")
                else:
                    Kff_inv, Beta = self._compute_cached_var_ssgp(GP_dyn, Xu, noise, "FITC")

                self.Beta[i, :] = Beta
                self.Kff_inv[i, :, :] = Kff_inv
                self.K_var[i] = GP_dyn.kernel.get_param("variance")
                self.lengthscale[i, :] = GP_dyn.kernel.get_param("lengthscale")
                self.Xu[i, :] = Xu
                self.noise[i, :, :] = noise

        print("variable caching for dynamics model {} is done!".format(i))


        print(self.Beta.size())
        print(self.lengthscale.size())
        print(self.Kff_inv.size())
        print(self.K_var.size())

        print("initialization is done!")
示例#20
0
    def test_cg(self):
        size = 100
        matrix = torch.DoubleTensor(size, size).normal_()
        matrix = matrix.matmul(matrix.transpose(-1, -2))
        matrix.div_(matrix.norm())
        matrix.add_(torch.DoubleTensor(matrix.size(-1)).fill_(1e-1).diag())

        rhs = torch.DoubleTensor(size, 50).normal_()
        solves = linear_cg(matrix.matmul, rhs=rhs, max_iter=size)

        # Check cg
        matrix_chol = matrix.potrf()
        actual = torch.potrs(rhs, matrix_chol)
        self.assertTrue(approx_equal(solves, actual))
示例#21
0
    def test_cg(self):
        size = 100
        matrix = torch.randn(size, size, dtype=torch.float64)
        matrix = matrix.matmul(matrix.transpose(-1, -2))
        matrix.div_(matrix.norm())
        matrix.add_(torch.eye(matrix.size(-1), dtype=torch.float64).mul_(1e-1))

        rhs = torch.randn(size, 50, dtype=torch.float64)
        solves = linear_cg(matrix.matmul, rhs=rhs, max_iter=size)

        # Check cg
        matrix_chol = matrix.cholesky(upper=True)
        actual = torch.potrs(rhs, matrix_chol)
        self.assertTrue(approx_equal(solves, actual))
示例#22
0
    def variance_propagation(self, input, Beta, lengthscale, variance, Kff_inv, mu, mean, covariance, noise, flag='prediction'):
        """
        variace of the propagation of GP for uncertain inputs
        :param input: traing inputs N by D or N by E
        :param Beta: cached Beta 1 by N
        :param lengthscale: legnth scale of the RBF kernel  1 by D
        :param Kff_inv: N by N
        :param variance: variance of the kernel
        :param mu: prediction for the mean of GP under uncertain inputs
        :param mean: mean for the uncertain inputs 1 by D or 1 by E
        :param covariance: covariance for the uncertain inputs D by D or E by E
        :return:
        """
        assert (input.size()[1] == mean.size()[1])



        #eq 11 of ref.[1]
        with torch.no_grad():
            mat1 = (lengthscale.diag() / 2. + covariance)
            #mat1 = (covariance / lengthscale * 2 + torch.eye(input.size()[1]))
            det =  (torch.det(mat1) ** -0.5) * (torch.det(lengthscale.diag())  ** 0.5)

            #mat1 = (lengthscale.diag() / 2. + covariance)
            # N by 1 by D (E) -/+ N by D (E) = N by N by D (E)
            diff_m = (input.unsqueeze(1) - input) / 2.
            sum_m = (input.unsqueeze(1) + input) / 2.

            mat2 = mat1.potrf(upper=False)
            mat3 = torch.potrs(torch.eye(mat1.size()[0]), mat2, upper=False)

            # elementwise computation
            # N by N

            mat4 = ((diff_m ** 2 / lengthscale * 2).sum(dim=-1)) * -0.5

            # N x N x 1 x D @ D x D @ N x N x D x 1 = N x N x 1 x 1(or D replaced by E) TODO MAYBE CONSIDER ADD SOME JITTER ?
            mat5 = sum_m - mean

            #print(mat3.size(), mat5.size())
            mat6 = (torch.matmul(mat5.unsqueeze(2), torch.matmul(mat3, mat5.unsqueeze(-1)))) * -0.5
            # N by N
            L = variance**2 * det* torch.mul(torch.exp(mat4), torch.exp(mat6.view(input.size()[0], input.size()[0])))
            #print(torch.trace(torch.matmul(Kff_inv, L)), torch.sum(torch.mul(Kff_inv, L)))
            var = torch.matmul(Beta, torch.matmul(L, Beta)) + variance - torch.trace(torch.matmul(Kff_inv, L)) - mu * mu + 2 * noise
            # if flag != 'prediction':
            #      print(mean, mu, var, torch.matmul(Beta, torch.matmul(L, Beta)), variance - torch.trace(torch.matmul(Kff_inv, L)), mu * mu)
            #      #print(mat4)
            return var
示例#23
0
	def train_locator_model(self, locator_features, model=None):
		regularization = self.params.regularization
		if self.regularization_matrix is None:
			self.regularization_matrix = regularization*torch.eye(locator_features.shape[1], device=self.params.device)

		train_XTX = torch.mm(locator_features.t(), locator_features)
		train_XTX = train_XTX + self.regularization_matrix
		train_XTY = torch.mm(locator_features.t(), self.labels)

		if model is None:
			model = torch.potrs(train_XTY, torch.potrf(train_XTX))
		else:
			for _ in range(30):
				model, _ = torch.trtrs(train_XTY - torch.mm(torch.triu(train_XTX, diagonal=1), model), torch.tril(train_XTX, diagonal=0), upper=False)
		return model
示例#24
0
    def forward(self, x_train, y_train, x_test=None, classify=False):
        # See the autograd section for explanation of what happens here.

        self.classify = classify
        n = x_train.size(0)
        kyy = torch.empty(n, n)

        for i in range(n):
            for j in range(i, n):
                # integrate over the cov func
                out = self.int2D(self.cov_func, x_train[i, 0], x_train[i, 1],
                                 x_train[j, 0], x_train[j, 1], 1e-6)
                kyy[i, j] = out
                if i != j:
                    kyy[j, i] = out

        kyy = kyy + self.sigma_n.pow(2) * torch.eye(n)
        with torch.no_grad():
            e, _ = kyy.eig()
            mine = torch.min(e[:, 0])

        if mine < 1e-6:
            print('chol correction')
            kyy = kyy + 1.1 * (1e-6 - torch.eye(n) * mine).abs()
        c = torch.cholesky(kyy, upper=True)
        # v = torch.potrs(y_train, c, upper=True)
        v, _ = torch.gesv(y_train.unsqueeze(1), kyy)
        if x_test is None:
            out = (c, v)

        if x_test is not None:
            with torch.no_grad():
                ntest = x_test.size(0)
                kfy = torch.empty(ntest, n)
                for i in range(ntest):
                    for j in range(n):
                        # integrate over the cov func
                        out = self.int1D(lambda x: self.cov_func(x_test[i], x),
                                         x_train[j, 0], x_train[j, 1], 1e-6)
                        kfy[i, j] = out

                # solve
                f_test = kfy.mm(v)
                tmp = torch.potrs(kfy.t(), c, upper=True)
                tmp = torch.sum(kfy * tmp.t(), dim=1)
                cov_f = self.sigma_f.pow(2) - tmp
            out = (f_test, cov_f)
        return out
示例#25
0
def test_batch_cg():
    batch = 5
    size = 100
    matrix = torch.DoubleTensor(batch, size, size).normal_()
    matrix = matrix.matmul(matrix.transpose(-1, -2))
    matrix.div_(matrix.norm())
    matrix.add_(torch.DoubleTensor(matrix.size(-1)).fill_(1e-1).diag())

    rhs = torch.DoubleTensor(batch, size, 50).normal_()
    solves = linear_cg(matrix.matmul, rhs=rhs, max_iter=size)

    # Check cg
    matrix_chol = torch.cat([matrix[i].potrf().unsqueeze(0) for i in range(5)])
    actual = torch.cat(
        [torch.potrs(rhs[i], matrix_chol[i]).unsqueeze(0) for i in range(5)])
    assert approx_equal(solves, actual)
示例#26
0
    def covariance_propagation(self, input, Beta_a, lengthscale_a, variance_a, mu_a,
                                            Beta_b, lengthscale_b, variance_b, mu_b,
                                            mean, covariance):
        """

        :param input:  traing inputs N by D or N by E
        :param Beta_a:  cached Beta for output dim a,  1 by N
        :param lengthscale_a: legnth scale of the RBF kernel for output dim a, 1 by D
        :param Kff_inv_a: for output dim a ,N by N
        :param variance_a:  variance of the kernel for output dim a
        :param mu_a: prediction for the mean of GP under uncertain inputs for output dim a
        :param Beta_b: cached Beta for output dim b,  1 by N
        :param lengthscale_b: legnth scale of the RBF kernel for output dim b, 1 by D
        :param Kff_inv_b: for output dim b ,N by N
        :param variance_b: variance of the kernel for output dim b
        :param mu_b: prediction for the mean of GP under uncertain inputs for output dim b
        :param mean: mean for the uncertain inputs 1 by D or 1 by E
        :param covariance: covariance for the uncertain inputs D by D or E by E
        :return:
        """
        assert (input.size()[1] == mean.size()[1])

        # eq 12 of ref.[1]
        with torch.no_grad():

            mat1 = 1 / (1 / lengthscale_a + 1 / lengthscale_b).diag()
            R = mat1 + covariance
            det = (torch.det(R) ** -0.5) * (torch.det(mat1) ** 0.5)

            # N by 1 by D (E) -/+ N by D (E) = N by N by D (E)
            diff_m = (input.unsqueeze(1) - input) / 2.
            sum_m = (input.unsqueeze(1) * lengthscale_a + input * lengthscale_b) / (lengthscale_a + lengthscale_b)

            mat2 = R.potrf(upper=False)
            mat3 = torch.potrs(torch.eye(mat1.size()[0]), mat2, upper=False)

            # elementwise computation
            # N by N
            mat4 = ((diff_m ** 2 / (lengthscale_a + lengthscale_b)).sum(dim=-1)) * -0.5
            # N x N x 1 x D @ D x D @ N x N x D x 1 = N x N x 1 x 1(or D replaced by E) TODO MAYBE CONSIDER ADD SOME JITTER ?
            mat5 = sum_m - mean
            mat6 = (torch.matmul(mat5.unsqueeze(2), torch.matmul(mat3, mat5.unsqueeze(-1)))) * -0.5
            # N by N
            L = variance_a * variance_b * det * torch.mul(torch.exp(mat4), torch.exp(mat6.view(input.size()[0], input.size()[0])))
            cov = torch.matmul(Beta_a, torch.matmul(L, Beta_b)) - mu_a * mu_b
            return cov
示例#27
0
    def forward(self, x_train, y_train, x_test=None):
        # See the autograd section for explanation of what happens here.
        n = x_train.size(0)
        q1 = (x_train[:, 1].view(n, 1) - x_train[:, 0].view(n, 1).t()) / (
            2 * self.lengthscale.pow(2)).sqrt()
        q2 = (x_train[:, 1].view(n, 1) - x_train[:, 1].view(n, 1).t()) / (
            2 * self.lengthscale.pow(2)).sqrt()
        m1 = (x_train[:, 0].view(n, 1) - x_train[:, 0].view(n, 1).t()) / (
            2 * self.lengthscale.pow(2)).sqrt()
        m2 = (x_train[:, 0].view(n, 1) - x_train[:, 1].view(n, 1).t()) / (
            2 * self.lengthscale.pow(2)).sqrt()
        kyy = self.sigma_f.pow(
            2
        ) * (self.lengthscale.pow(2) * math.sqrt(math.pi) * ((
            (q1 * torch.erf(q1) + torch.exp(-q1.pow(2)) / math.sqrt(math.pi)) -
            (q2 * torch.erf(q2) + torch.exp(-q2.pow(2)) / math.sqrt(math.pi))
        ) + (
            (m2 * torch.erf(m2) + torch.exp(-m2.pow(2)) / math.sqrt(math.pi)) -
            (m1 * torch.erf(m1) + torch.exp(-m1.pow(2)) / math.sqrt(math.pi))))
             + self.sigma_n.pow(2) * torch.eye(n))

        #d = 0.5*(x_train - x_train.t()).pow(2)/self.lengthscale.pow(2)
        #kyy = self.sigma_f.pow(2)*torch.exp(-d) + self.sigma_n.pow(2) * torch.eye(n)

        c = torch.cholesky(kyy, upper=True)
        # v = torch.potrs(y_train, c, upper=True)
        v, _ = torch.gesv(y_train, kyy)  # kyy^-1 * y
        if x_test is None:
            out = (c, v)

        if x_test is not None:
            with torch.no_grad():
                kfy = ((math.sqrt(math.pi) / 2) * (torch.erf(
                    (x_train[:, 1].view(n, 1) - x_test.t()) /
                    math.sqrt(2 * self.lengthscale.pow(2))) - torch.erf(
                        (x_train[:, 0].view(n, 1) - x_test.t()) /
                        math.sqrt(2 * self.lengthscale.pow(2)))) *
                       math.sqrt(2 * self.lengthscale.pow(2)))
                kfy = self.sigma_f.pow(2) * kfy.t()
                # solve
                f_test = kfy.mm(v)
                tmp = torch.potrs(kfy.t(), c, upper=True)
                tmp = torch.sum(kfy * tmp.t(), dim=1)
                cov_f = self.sigma_f.pow(2) - tmp
            out = (f_test, cov_f)
        return out
示例#28
0
def bpotrs(b, u, upper=True, out=None):
    """ batch solve a linear system of equations """
    "with a positive semidefinite matrix to be inverted given its given a Cholesky factor matrix"
    "RuntimeError: the derivative for 'potri' is not implemented"
    
    s = u.size() # (m,N,D,D)
    D = s[-1]
    
    b_view = b.view(-1,D) # (mN, D)
    u_view = u.view((-1,)+s[-2:])
    c = Variable(b_view.data.new(b_view.size()))
    for i in range(c.size()[0]):
        c[i,:] = torch.potrs(b_view[i,:],u_view[i,:,:],upper)
    
    if out:
        out = c.view(b.size())
    else:
        return c.view(b.size())
示例#29
0
def woodbury_factor(low_rank_mat, shift):
    """
    Given a low rank (k x n) matrix V and a shift, returns the
    matrix R so that
        R = (I_k + 1/shift VV')^{-1}V
    to be used in solves with (V'V + shift I) via the Woodbury formula
    """
    k = low_rank_mat.size(-2)
    shifted_mat = low_rank_mat.matmul(low_rank_mat.transpose(-1, -2) / shift.unsqueeze(-1))

    shifted_mat = shifted_mat + torch.eye(k, dtype=shifted_mat.dtype, device=shifted_mat.device)

    if low_rank_mat.ndimension() == 3:
        R = batch_potrs(low_rank_mat, batch_potrf(shifted_mat))
    else:
        R = torch.potrs(low_rank_mat, shifted_mat.potrf())

    return R
示例#30
0
    def _output_mean(self, input, Beta, lengthscale, variance, mean, covariance):
        """
        mean of the prpagation of GP for uncertain inputs
        :param input: traing inputs H x (n+m)
        :param Beta: cached Beta 1 x H
        :param lengthscale: legnth scale of the RBF kernel  1 x (n + m)

        :param variance: variance of the kernel
        :param mean: mean for the uncertain inputs 1 x (n + m)
        :param covariance: covariance for the uncertain inputs (n + m) x (n + m)
        :return:
        """
        ### porediction of gp mean for uncertain inputs
        # print(input.size())
        # print(Beta.size())
        # print(lengthscale.size())
        # print(variance.size())
        # print(mean.size())
        # print(covariance.size())

        mean.requires_grad = True
        covariance.requires_grad = True

        assert(input.size()[1] == mean.size()[1])

        # eq 9 of ref. [1]
        #with torch.no_grad():
            #print(covariance)
        mat1 = (lengthscale.diag() + covariance)

        det = variance * (torch.det(mat1) ** -0.5) * (torch.det(lengthscale.diag()) ** 0.5)
        diff = input - mean
        # N x 1 x D @ D x D @ N x D x 1 = N x 1 x 1(or D replaced by E) TODO MAYBE CONSIDER ADD SOME JITTER ?
        mat2 = mat1.potrf(upper=False)
        mat3 = torch.potrs(torch.eye(mat1.size()[0]), mat2, upper=False)
        mat4 = (torch.matmul(diff.unsqueeze(1), torch.matmul(mat3, diff.unsqueeze(-1)))) * -0.5
        # (N, )
        l = det * torch.exp(mat4.view(-1))
        mu = torch.matmul(Beta, l)

        #TODO compute the gradient
        mu.backward()

        return mu, mean.grad.data, covariance.grad.data