Пример #1
0
 def _get_inv_logdet_cholesky(mat):
     try:
         L = np.linalg.cholesky(mat)
     except:
         L = np.linalg.cholesky(_eig_val_correction(mat, eps=1e-1))
     L_inv = np.linalg.inv(L)
     mat_inv = L_inv.T.dot(L_inv)
     mat_logdet = np.sum(np.log(np.diag(L))) * 2
     return mat_inv, mat_logdet
    def _vi_means_oracle(self, points, targets, params, ind_points):
        """
        Oracle function for 'vi' and 'means' methods.
        :param points: data points array
        :param targets: target values vector
        :param params: hyper-parameters vector
        :param ind_points: inducing points
        """
        start = time.time()
        n = points.shape[1]
        m = ind_points.shape[1]
        sigma = params[-1]
        cov_obj = copy.deepcopy(self.covariance_obj)
        cov_obj.set_params(params)
        cov_fun = cov_obj.covariance_function
        K_mm = cov_fun(ind_points, ind_points)
        K_mm_l = np.linalg.cholesky(K_mm)
        K_mm_l_inv = np.linalg.inv(K_mm_l)
        K_mm_inv = K_mm_l_inv.T.dot(K_mm_l_inv)
        K_nm = cov_fun(points, ind_points)
        K_mn = K_nm.T
        K_mnK_nm = K_mn.dot(K_nm)
        Q_nn_tr = np.trace(K_mm_inv.dot(K_mnK_nm))
        try:
            anc_l = np.linalg.cholesky(K_mm + K_mnK_nm / sigma**2)
        except:
            # print(sigma)
            print('Warning, matrix is not positive definite', params)
            new_mat = _eig_val_correction(K_mm + K_mnK_nm / sigma**2, eps=10)
            # new_mat = (new_mat + new_mat.T)/2
            # new_mat += np.eye(m) * (np.abs(np.min(np.linalg.eigvals(new_mat))) + 1e-4)
            # print(np.linalg.eigvals(new_mat))
            anc_l = np.linalg.cholesky(new_mat)
            # raise ValueError('Singular matrix encountered. Parameters: ' + str(params))
        anc_l_inv = np.linalg.inv(anc_l)
        anc_inv = anc_l_inv.T.dot(anc_l_inv)
        K_mn_y = K_mn.dot(targets)
        y_B_inv_y = targets.T.dot(targets) / sigma**2 - K_mn_y.T.dot(
            anc_inv.dot(K_mn_y)) / sigma**4
        B_inv_y = targets / sigma**2 - K_mn.T.dot(
            anc_inv.dot(K_mn.dot(targets))) / sigma**4
        B_log_det = (np.sum(np.log(np.diag(anc_l))) + n * np.log(sigma) -
                     np.sum(np.log(np.diag(K_mm_l)))) * 2
        zero = np.array([[0]])
        K_nn_diag = cov_fun(zero, zero)
        F_v = - B_log_det/2 - y_B_inv_y/2 - \
              (K_nn_diag * n - Q_nn_tr) / (2 * sigma**2)

        # Gradient
        gradient = []

        derivative_matrix_list = cov_obj.get_derivative_function_list(params)
        A = anc_inv
        for func in derivative_matrix_list:
            dK_nm = func(points, ind_points)
            dK_mn = dK_nm.T
            dK_mm = func(ind_points, ind_points)
            dK_mm_inv = -K_mm_inv.dot(dK_mm.dot(K_mm_inv))
            K_mndK_nm = K_mn.dot(dK_nm)
            dB_dtheta_tr = 2 * np.trace(K_mm_inv.dot(K_mndK_nm)) + np.trace(
                dK_mm_inv.dot(K_mnK_nm))
            dB_B_inv_y = dK_nm.dot(K_mm_inv.dot(K_mn.dot(B_inv_y))) + K_nm.dot(dK_mm_inv.dot(K_mn.dot(B_inv_y)))\
                                 + K_nm.dot(K_mm_inv.dot(dK_mn.dot(B_inv_y)))
            y_B_inv_dB_B_inv_y = B_inv_y.T.dot(dB_B_inv_y)
            B_inv_dB_tr = dB_dtheta_tr / sigma**2 - \
                          (2 * np.trace((A.dot(K_mndK_nm)).dot(K_mm_inv.dot(K_mnK_nm)))
                           + np.trace((A.dot(K_mnK_nm)).dot(dK_mm_inv.dot(K_mnK_nm))))/sigma**4

            dK_nn = func(zero, zero)
            gradient.append((-B_inv_dB_tr / 2 + y_B_inv_dB_B_inv_y / 2 -
                             (dK_nn * n - dB_dtheta_tr) / (2 * sigma**2))[0,
                                                                          0])

        # sigma derivative
        dK_mm = cov_obj.get_noise_derivative(K_mm.shape[0])
        dK_mm_inv = -K_mm_inv.dot(dK_mm.dot(K_mm_inv))

        dQ_dtheta_tr = np.trace(dK_mm_inv.dot(K_mnK_nm))
        dQ_B_inv_y = K_nm.dot(dK_mm_inv.dot(K_mn.dot(B_inv_y)))
        y_B_inv_dQ_B_inv_y = B_inv_y.T.dot(dQ_B_inv_y)
        y_B_inv_dB_B_inv_y = 2 * sigma * B_inv_y.T.dot(
            B_inv_y) + y_B_inv_dQ_B_inv_y
        dB_dtheta_tr = 2 * sigma * n + dQ_dtheta_tr
        B_inv_dB_tr = dB_dtheta_tr / sigma**2 - \
                      (np.trace(A.dot(K_mnK_nm).dot(dK_mm_inv).dot(K_mnK_nm))
                       + 2 * sigma * np.trace(A.dot(K_mnK_nm)))/sigma**4
        dK_nn = 2 * sigma
        gradient.append((-B_inv_dB_tr / 2 + y_B_inv_dB_B_inv_y / 2 -
                         (n * dK_nn - dQ_dtheta_tr) / (2 * sigma**2))[0, 0] +
                        (n * K_nn_diag[0, 0] - Q_nn_tr) / sigma**3)

        # inducing points derivatives
        # By now this is not written in an optimal way
        if self.method == 'vi':
            B_inv = np.eye(n) / sigma**2 - K_mn.T.dot(
                anc_inv.dot(K_mn)) / sigma**4
            # print('vi method might not work propperly in the current version')
            K_mn_derivatives = cov_obj.covariance_derivative(
                ind_points, points)
            K_mm_derivatives = cov_obj.covariance_derivative(
                ind_points, ind_points)
            for j in range(ind_points.shape[0]):
                for i in range(ind_points.shape[1]):
                    dK_mn = np.zeros(K_mn.shape)
                    dK_mn[i, :] = K_mn_derivatives[j, i, :]
                    dK_nm = dK_mn.T
                    dK_mm = np.zeros(K_mm.shape)
                    dK_mm[i, :] = K_mm_derivatives[j, i, :]
                    dK_mm[:, i] = K_mm_derivatives[j, i, :].T
                    dK_mm_inv = -K_mm_inv.dot(dK_mm.dot(K_mm_inv))
                    dB_dtheta = (dK_nm.dot(K_mm_inv) +
                                 K_nm.dot(dK_mm_inv)).dot(K_mn) + K_nm.dot(
                                     K_mm_inv.dot(dK_mn))
                    dK_nn = 0
                    gradient.append(
                        self._vi_lower_bound_partial_derivative(
                            targets, dB_dtheta, dB_dtheta, B_inv, sigma,
                            dK_nn))
        return F_v[0, 0], np.array(gradient)
    def _vi_means_oracle(self, points, targets, params, ind_points):
        """
        Oracle function for 'vi' and 'means' methods.
        :param points: data points array
        :param targets: target values vector
        :param params: hyper-parameters vector
        :param ind_points: inducing points
        """
        start = time.time()
        n = points.shape[1]
        m = ind_points.shape[1]
        sigma = params[-1]
        cov_obj = copy.deepcopy(self.covariance_obj)
        cov_obj.set_params(params)
        cov_fun = cov_obj.covariance_function
        K_mm = cov_fun(ind_points, ind_points)
        K_mm_l = np.linalg.cholesky(K_mm)
        K_mm_l_inv = np.linalg.inv(K_mm_l)
        K_mm_inv = K_mm_l_inv.T.dot(K_mm_l_inv)
        K_nm = cov_fun(points, ind_points)
        K_mn = K_nm.T
        K_mnK_nm = K_mn.dot(K_nm)
        Q_nn_tr = np.trace(K_mm_inv.dot(K_mnK_nm))
        try:
            anc_l = np.linalg.cholesky(K_mm + K_mnK_nm/sigma**2)
        except:
            # print(sigma)
            print('Warning, matrix is not positive definite', params)
            new_mat = _eig_val_correction(K_mm + K_mnK_nm/sigma**2, eps=10)
            # new_mat = (new_mat + new_mat.T)/2
            # new_mat += np.eye(m) * (np.abs(np.min(np.linalg.eigvals(new_mat))) + 1e-4)
            # print(np.linalg.eigvals(new_mat))
            anc_l = np.linalg.cholesky(new_mat)
            # raise ValueError('Singular matrix encountered. Parameters: ' + str(params))
        anc_l_inv = np.linalg.inv(anc_l)
        anc_inv = anc_l_inv.T.dot(anc_l_inv)
        K_mn_y = K_mn.dot(targets)
        y_B_inv_y = targets.T.dot(targets)/sigma**2 - K_mn_y.T.dot(anc_inv.dot(K_mn_y))/sigma**4
        B_inv_y = targets / sigma**2 - K_mn.T.dot(anc_inv.dot(K_mn.dot(targets)))/sigma**4
        B_log_det = (np.sum(np.log(np.diag(anc_l))) + n * np.log(sigma) - np.sum(np.log(np.diag(K_mm_l))))*2
        zero = np.array([[0]])
        K_nn_diag = cov_fun(zero, zero)
        F_v = - B_log_det/2 - y_B_inv_y/2 - \
              (K_nn_diag * n - Q_nn_tr) / (2 * sigma**2)

        # Gradient
        gradient = []

        derivative_matrix_list = cov_obj.get_derivative_function_list(params)
        A = anc_inv
        for func in derivative_matrix_list:
            dK_nm = func(points, ind_points)
            dK_mn = dK_nm.T
            dK_mm = func(ind_points, ind_points)
            dK_mm_inv = - K_mm_inv.dot(dK_mm.dot(K_mm_inv))
            K_mndK_nm = K_mn.dot(dK_nm)
            dB_dtheta_tr = 2 * np.trace(K_mm_inv.dot(K_mndK_nm)) + np.trace(dK_mm_inv.dot(K_mnK_nm))
            dB_B_inv_y = dK_nm.dot(K_mm_inv.dot(K_mn.dot(B_inv_y))) + K_nm.dot(dK_mm_inv.dot(K_mn.dot(B_inv_y)))\
                                 + K_nm.dot(K_mm_inv.dot(dK_mn.dot(B_inv_y)))
            y_B_inv_dB_B_inv_y = B_inv_y.T.dot(dB_B_inv_y)
            B_inv_dB_tr = dB_dtheta_tr / sigma**2 - \
                          (2 * np.trace((A.dot(K_mndK_nm)).dot(K_mm_inv.dot(K_mnK_nm)))
                           + np.trace((A.dot(K_mnK_nm)).dot(dK_mm_inv.dot(K_mnK_nm))))/sigma**4

            dK_nn = func(zero, zero)
            gradient.append((-B_inv_dB_tr / 2 + y_B_inv_dB_B_inv_y / 2 -
                             (dK_nn * n - dB_dtheta_tr) / (2 * sigma**2))[0, 0])

        # sigma derivative
        dK_mm = cov_obj.get_noise_derivative(K_mm.shape[0])
        dK_mm_inv = - K_mm_inv.dot(dK_mm.dot(K_mm_inv))

        dQ_dtheta_tr = np.trace(dK_mm_inv.dot(K_mnK_nm))
        dQ_B_inv_y = K_nm.dot(dK_mm_inv.dot(K_mn.dot(B_inv_y)))
        y_B_inv_dQ_B_inv_y = B_inv_y.T.dot(dQ_B_inv_y)
        y_B_inv_dB_B_inv_y = 2 * sigma * B_inv_y.T.dot(B_inv_y) + y_B_inv_dQ_B_inv_y
        dB_dtheta_tr = 2 * sigma * n + dQ_dtheta_tr
        B_inv_dB_tr = dB_dtheta_tr / sigma**2 - \
                      (np.trace(A.dot(K_mnK_nm).dot(dK_mm_inv).dot(K_mnK_nm))
                       + 2 * sigma * np.trace(A.dot(K_mnK_nm)))/sigma**4
        dK_nn = 2 * sigma
        gradient.append((- B_inv_dB_tr / 2 + y_B_inv_dB_B_inv_y / 2 -
                (n * dK_nn - dQ_dtheta_tr) / (2 * sigma**2))[0, 0] + (n * K_nn_diag[0, 0] - Q_nn_tr) / sigma**3)

        # inducing points derivatives
        # By now this is not written in an optimal way
        if self.method == 'vi':
            B_inv = np.eye(n)/sigma**2 - K_mn.T.dot(anc_inv.dot(K_mn))/sigma**4
            # print('vi method might not work propperly in the current version')
            K_mn_derivatives = cov_obj.covariance_derivative(ind_points, points)
            K_mm_derivatives = cov_obj.covariance_derivative(ind_points, ind_points)
            for j in range(ind_points.shape[0]):
                for i in range(ind_points.shape[1]):
                    dK_mn = np.zeros(K_mn.shape)
                    dK_mn[i, :] = K_mn_derivatives[j, i, :]
                    dK_nm = dK_mn.T
                    dK_mm = np.zeros(K_mm.shape)
                    dK_mm[i, :] = K_mm_derivatives[j, i, :]
                    dK_mm[:, i] = K_mm_derivatives[j, i, :].T
                    dK_mm_inv = - K_mm_inv.dot(dK_mm.dot(K_mm_inv))
                    dB_dtheta = (dK_nm.dot(K_mm_inv) + K_nm.dot(dK_mm_inv)).dot(K_mn) + K_nm.dot(K_mm_inv.dot(dK_mn))
                    dK_nn = 0
                    gradient.append(self._vi_lower_bound_partial_derivative(targets, dB_dtheta, dB_dtheta, B_inv, sigma,
                                                                             dK_nn))
        return F_v[0, 0], np.array(gradient)