def test_kernel_gram_matrix(self): """ K(x) == phi_x.dot(phi_x) K(x, y) == phi_x.dot(phi_y) K(x, Y) == K(Y, x) = [phi_x.dot(phi_y) for y in Y] K(X) == [phi_x.dot(phi_x) for x in X] K(X, Y) == [phi_x.dot(phi_y) for x, y in zip(X, Y)] """ N = 100 dims = np.arange(2, 5) for d in dims: jacobi_params = 0.5 - np.random.rand(d, 2) jacobi_params[0, :] = -0.5 dpp = MultivariateJacobiOPE(N, jacobi_params) x, y = np.random.rand(d), np.random.rand(d) phi_x = dpp.eval_poly_multiD(x, normalize='norm') phi_y = dpp.eval_poly_multiD(y, normalize='norm') X, Y = np.random.rand(5, d), np.random.rand(5, d) phi_X = dpp.eval_poly_multiD(X, normalize='norm').T phi_Y = dpp.eval_poly_multiD(Y, normalize='norm').T self.assertTrue(np.allclose(dpp.K(x), inner1d(phi_x))) self.assertTrue(np.allclose(dpp.K(X), inner1d(phi_X))) self.assertTrue(np.allclose(dpp.K(x, y), inner1d(phi_x, phi_y))) self.assertTrue(np.allclose(dpp.K(x, Y), phi_x.dot(phi_Y))) self.assertTrue(np.allclose(dpp.K(X, Y), inner1d(phi_X, phi_Y)))
def proj_dpp_sampler_eig_KuTa12(eig_vecs, size=None): """ Sample from :math:`\operatorname{DPP}(K)` using the eigendecomposition of the similarity kernel :math:`K`. It is based on the orthogonalization of the selected eigenvectors. :param eig_vals: Collection of eigen values of the similarity kernel :math:`K`. :type eig_vals: list :param eig_vecs: Eigenvectors of the similarity kernel :math:`K`. :type eig_vecs: array_like :return: A sample from :math:`\operatorname{DPP}(K)`. :rtype: list .. seealso:: - :cite:`KuTa12` Algorithm 1 - :func:`proj_dpp_sampler_eig_GS <proj_dpp_sampler_eig_GS>` - :func:`proj_dpp_sampler_eig_GS_bis <proj_dpp_sampler_eig_GS_bis>` """ # Initialization V = eig_vecs.copy() N, rank = V.shape # ground set size / rank(K) size = rank if size is None else size # Full projection DPP or k-DPP sampl = np.zeros(size, dtype=int) # sample list # Phase 1: Already performed! # Select eigvecs with Bernoulli variables with parameter the eigvals # Phase 2: Chain rule norms_2 = inner1d(V, axis=1) # ||V_i:||^2 # Following [Algo 1, KuTa12], the aim is to compute the orhto complement of the subspace spanned by the selected eigenvectors to the canonical vectors \{e_i ; i \in Y\}. We proceed recursively. for it in range(size): j = np.random.choice(N, size=1, p=np.abs(norms_2) / (rank - it))[0] sampl[it] = j if it == size - 1: break # Cancel the contribution of e_i to the remaining vectors that is, find the subspace of V that is orthogonal to \{e_i ; i \in Y\} # Take the index of a vector that has a non null contribution on e_j k = np.where(V[j, :] != 0)[0][0] # Cancel the contribution of the remaining vectors on e_j, but stay in the subspace spanned by V i.e. get the subspace of V orthogonal to \{e_i ; i \in Y\} V -= np.outer(V[:, k] / V[j, k], V[j, :]) # V_:j is set to 0 so we delete it and we can derive an orthononormal basis of the subspace under consideration V, _ = la.qr(np.delete(V, k, axis=1), mode='economic') norms_2 = inner1d(V, axis=1) # ||V_i:||^2 return sampl
def test_inner_products_and_square_norms(self): X = rndm.rand(10, 20, 30, 40) Y = rndm.rand(*X.shape) for ax in range(X.ndim): # inner product self.assertTrue( np.allclose(utils.inner1d(X, Y, axis=ax), (X * Y).sum(axis=ax))) # square norm self.assertTrue( np.allclose(utils.inner1d(X, axis=ax), (X**2).sum(axis=ax)))
def proj_dpp_sampler_eig_GS(eig_vecs, size=None, random_state=None): """ Sample from projection :math:`\\operatorname{DPP}(K)` using the eigendecomposition of the projection kernel :math:`K=VV^{\top}` where :math:`V^{\top}V = I_r` and :math:`r=\\operatorname{rank}(\\mathbf{K})`. It performs sequential update of Cholesky decomposition, which is equivalent to Gram-Schmidt orthogonalization of the rows of the eigenvectors. :param eig_vecs: Eigenvectors used to form projection kernel :math:`K=VV^{\top}`. :type eig_vecs: array_like :return: A sample from projection :math:`\\operatorname{DPP}(K)`. :rtype: list, array_like .. seealso:: - cite:`TrBaAm18` Algorithm 3, :cite:`Gil14` Algorithm 2 - :func:`proj_dpp_sampler_eig_GS_bis <proj_dpp_sampler_eig_GS_bis>` - :func:`proj_dpp_sampler_eig_KuTa12 <proj_dpp_sampler_eig_KuTa12>` """ rng = check_random_state(random_state) # Initialization V = eig_vecs N, rank = V.shape # ground set size / rank(K) if size is None: # full projection DPP size = rank # else: k-DPP with k = size ground_set = np.arange(N) sampl = np.zeros(size, dtype=int) # sample list avail = np.ones(N, dtype=bool) # available items # Phase 1: Already performed! # Select eigvecs with Bernoulli variables with parameter = eigvals of K. # Phase 2: Chain rule # Use Gram-Schmidt recursion to compute the Vol^2 of the parallelepiped spanned by the feature vectors associated to the sample c = np.zeros((N, size)) norms_2 = inner1d(V, axis=1) # ||V_i:||^2 for it in range(size): # Pick an item \propto this squred distance j = rng.choice(ground_set[avail], p=np.abs(norms_2[avail]) / (rank - it)) sampl[it] = j if it == size - 1: break # Cancel the contribution of V_j to the remaining feature vectors avail[j] = False c[avail, it] =\ (V[avail, :].dot(V[j, :]) - c[avail, :it].dot(c[j, :it]))\ / np.sqrt(norms_2[j]) norms_2[avail] -= c[avail, it]**2 # update residual norm^2 return sampl.tolist()
def test_inner1D_to_compute_inner_product_and_square_norms(self): shape = (10, 20, 30, 40) X = rndm.rand(*shape) Y = rndm.rand(*shape) for ax in range(len(shape)): with self.subTest(axis=ax): for test_inner1D in ['inner_prod', 'sq_norm']: with self.subTest(test_inner1D=test_inner1D): if test_inner1D == 'inner_prod': self.assertTrue( np.allclose(utils.inner1d(X, Y, axis=ax), (X * Y).sum(axis=ax))) if test_inner1D == 'sq_norm': self.assertTrue( np.allclose(utils.inner1d(X, axis=ax), (X**2).sum(axis=ax)))
def proj_dpp_sampler_eig_GS_bis(eig_vecs, size=None): """ Sample from projection :math:`\operatorname{DPP}(K)` using the eigendecomposition of the projection kernel :math:`K=VV^{\top}` where :math:`V^{\top}V = I_r` and :math:`r=\operatorname{rank}(\mathbf{K})`. It performs sequential Gram-Schmidt orthogonalization of the rows of the eigenvectors. :param eig_vecs: Eigenvectors used to form projection kernel :math:`K=VV^{\top}`. :type eig_vecs: array_like :return: A sample from projection :math:`\operatorname{DPP}(K)`. :rtype: list, array_like .. seealso:: - This is a slight modification of :func:`proj_dpp_sampler_eig_GS <proj_dpp_sampler_eig_GS>` - :func:`proj_dpp_sampler_eig_KuTa12 <proj_dpp_sampler_eig_KuTa12>` """ # Initialization V = eig_vecs.copy() N, rank = V.shape # ground set size / rank(K) if size is None: # full projection DPP size = rank # else: k-DPP with k = size ground_set = np.arange(N) sampl = np.zeros(size, dtype=int) # sample list avail = np.ones(N, dtype=bool) # available items # Phase 1: Already performed! # Select eigvecs with Bernoulli variables with parameter = eigvals of K. # Phase 2: Chain rule # Use Gram-Schmidt recursion to compute the Vol^2 of the parallelepiped spanned by the feature vectors associated to the sample # Matrix of the contribution of remaining vectors # <V_i, P_{V_Y}^{orthog} V_j> contrib = np.zeros((N, size)) norms_2 = inner1d(V, axis=1) # ||V_i:||^2 for it in range(size): # Pick an item proportionally to the residual norm^2 # ||P_{V_Y}^{orthog} V_j||^2 j = np.random.choice(ground_set[avail], p=np.abs(norms_2[avail]) / (rank - it)) sampl[it] = j if it == size - 1: break # Update the residual norm^2 # # |P_{V_Y+j}^{orthog} V_i|^2 # <V_i,P_{V_Y}^{orthog} V_j>^2 # = |P_{V_Y}^{orthog} V_i|^2 - ---------------------------- # |P_{V_Y}^{orthog} V_j|^2 # # 1) Orthogonalize V_j w.r.t. orthonormal basis of Span(V_Y) # V'_j = P_{V_Y}^{orthog} V_j # = V_j - <V_j,sum_Y V'_k>V'_k # = V_j - sum_Y <V_j, V'_k> V'_k # Note V'_j is not normalized avail[j] = False V[j, :] -= contrib[j, :it].dot(V[sampl[:it], :]) # 2) Compute <V_i, V'_j> = <V_i, P_{V_Y}^{orthog} V_j> contrib[avail, it] = V[avail, :].dot(V[j, :]) # 3) Normalize V'_j with norm^2 and not norm # V'_j P_{V_Y}^{orthog} V_j # V'_j = ------- = -------------------------- # |V'j|^2 |P_{V_Y}^{orthog} V_j|^2 # # in preparation for next orthogonalization in 1) V[j, :] /= norms_2[j] # 4) Update the residual norm^2: cancel contrib of V_i onto V_j # # |P_{V_Y+j}^{orthog} V_i|^2 # = |P_{V_Y}^{orthog} V_i|^2 - <V_i,V'_j>^2 / |V'j|^2 # <V_i,P_{V_Y}^{orthog} V_j>^2 # = |P_{V_Y}^{orthog} V_i|^2 - ---------------------------- # |P_{V_Y}^{orthog} V_j|^2 norms_2[avail] -= contrib[avail, it]**2 / norms_2[j] return sampl
def proj_dpp_sampler_kernel_Schur(K, size=None): """ Sample from: - :math:`\operatorname{DPP}(K)` with orthogonal projection **correlation** kernel :math:`K` if ``size`` is not provided - :math:`\operatorname{k-DPP}` with orthogonal projection **likelihood** kernel :math:`K` with :math:`k=` ``size`` Chain rule is applied by computing the Schur complements. :param K: Orthogonal projection kernel. :type K: array_like :param size: Size of the sample. Default is :math:`k=\operatorname{Tr}(K)=\operatorname{rank}(K)`. :type size: int :return: If ``size`` is not provided (None), A sample from :math:`\operatorname{DPP}(K)`. If ``size`` is provided, A sample from :math:`\operatorname{k-DPP}(K)`. :rtype: array_like .. seealso:: - :func:`proj_dpp_sampler_kernel_GS <proj_dpp_sampler_kernel_GS>` """ # Initialization # ground set size / rank(K) = Tr(K) N, rank = len(K), np.round(np.trace(K)).astype(int) if size is None: # full projection DPP size = rank # else: k-DPP with k = size ground_set = np.arange(N) sampl = np.zeros(size, dtype=int) # sample list avail = np.ones(N, dtype=bool) # available items # Schur complement list i.e. residual norm^2 schur_comp = K.diagonal().copy() K_inv = np.zeros((size, size)) for it in range(size): # Pick a new item proportionally to residual norm^2 j = np.random.choice(ground_set[avail], p=np.abs(schur_comp[avail]) / (rank - it)) # store the item and make it unavailable sampl[it], avail[j] = j, False # Update Schur complements K_ii - K_iY (K_Y)^-1 K_Yi # # 1) use Woodbury identity to update K[Y,Y]^-1 to K[Y+j,Y+j]^-1 # K[Y+j,Y+j]^-1 = # [ K[Y,Y]^-1 + (K[Y,Y]^-1 K[Y,j] K[j,Y] K[Y,Y]^-1)/schur_j, # -K[Y,Y]^-1 K[Y,j]/schur_j] # [ -K[j,Y] K[Y,Y]^-1/schur_j, # 1/schur_j] if it == 0: K_inv[0, 0] = 1.0 / K[j, j] elif it == 1: i = sampl[0] K_inv[:2, :2] = np.array([[K[j, j], -K[j, i]], [-K[j, i], K[i, i]]])\ / (K[i, i] * K[j, j] - K[j, i]**2) elif it < size - 1: temp = K_inv[:it, :it].dot(K[sampl[:it], j]) # K_Y^-1 K_Yj # K_jj - K_jY K_Y^-1 K_Yj schur_j = K[j, j] - K[j, sampl[:it]].dot(temp) K_inv[:it, :it] += np.outer(temp, temp / schur_j) K_inv[:it, it] = -temp / schur_j K_inv[it, :it] = K_inv[:it, it] K_inv[it, it] = 1.0 / schur_j else: # it == size-1 break # no need to update for nothing # 2) update Schur complements # K_ii - K_iY (K_Y)^-1 K_Yi for Y <- Y+j K_iY = K[np.ix_(avail, sampl[:it + 1])] schur_comp[avail] = K[avail, avail]\ - inner1d(K_iY.dot(K_inv[:it+1, :it+1]), K_iY, axis=1) return sampl
def proj_dpp_sampler_kernel_Schur(K, size=None): """ Sample from: - :math:`\operatorname{DPP}(K)` with orthogonal projection **correlation** kernel :math:`K` if ``size`` is not provided - :math:`\operatorname{k-DPP}` with orthogonal projection **likelihood** kernel :math:`K` with :math:`k=` ``size`` Chain rule is applied by computing the Schur complements. :param K: Orthogonal projection kernel. :type K: array_like :param size: Size of the sample. Default is :math:`k=\operatorname{Tr}(K)=\operatorname{rank}(K)`. :type size: int :return: If ``size`` is not provided (None), A sample from :math:`\operatorname{DPP}(K)`. If ``size`` is provided, A sample from :math:`\operatorname{k-DPP}(K)`. :rtype: array_like .. seealso:: - :func:`proj_dpp_sampler_kernel_GS <proj_dpp_sampler_kernel_GS>` """ # Initialization # ground set size / rank(K) = Tr(K) N, rank = K.shape[0], int(np.round(np.trace(K))) ground_set = np.arange(N) size = rank if size is None else size # Full projection DPP or k-DPP sampl = np.zeros(size, dtype=int) # sample list avail = np.ones(N, dtype=bool) # available items K_diag = K.diagonal() schur_comp = K_diag.copy() # Schur complement list i.e. residual norm^2 for it in range(size): # Pick a new item proportionally to residual norm^2 j = np.random.choice(ground_set[avail], size=1, p=np.abs(schur_comp[avail]) / (rank - it))[0] # store the item and make it unavailable sampl[it], avail[j] = j, False # Update Schur complements K_ii - K_iY (K_Y)^-1 K_Yi # # 1) use Woodbury identity to update K[Y,Y]^-1 to K[Y+j,Y+j]^-1 # K[Y+j,Y+j]^-1 = # [ K[Y,Y]^-1 + (K[Y,Y]^-1 K[Y,j] K[j,Y] K[Y,Y]^-1)/schur_j, # -K[Y,Y]^-1 K[Y,j]/schur_j] # [ -K[j,Y] K[Y,Y]^-1/schur_j, # 1/schur_j] if it == 0: K_inv = 1.0 / K[j, j] elif it == 1: Y = sampl[0] K_inv = np.array([[K[j, j], -K[j, Y]], [-K[j, Y], K[Y, Y]]]) K_inv /= K[Y, Y] * K[j, j] - K[j, Y]**2 elif it < size - 1: Y = sampl[:it] temp = K_inv.dot(K[Y, j]) # K_Y^-1 K_Yj schur_j = K[j, j] - K[j, Y].dot(temp) # K_jj - K_jY K_Y^-1 K_Yj K_inv = np.lib.pad(K_inv, (0, 1), 'constant', constant_values=1.0 / schur_j) K_inv[:-1, :-1] += np.outer(temp, temp / schur_j) K_inv[:-1, -1] *= -temp K_inv[-1, :-1] = K_inv[:-1, -1] else: # it == size-1 break # no need to update for nothing # 2) update Schur complements # K_ii - K_iY (K_Y)^-1 K_Yi for Y <- Y+j K_iY = K[np.ix_(avail, sampl[:it + 1])] schur_comp[avail] =\ K_diag[avail] - inner1d(K_iY.dot(K_inv), K_iY, axis=1) return sampl
def K(self, X, Y=None, eval_pointwise=False): """Evalute :math:`\\left(K(x, y)\\right)_{x\\in X, y\\in Y}` if ``eval_pointwise=False`` or :math:`\\left(K(x, y)\\right)_{(x, y)\\in (X, Y)}` otherwise .. math:: K(x, y) = \\sum_{\\mathfrak{b}(k)=0}^{N-1} P_{k}(x) P_{k}(y) = \\phi(x)^{\\top} \\phi(y) where - :math:`k \\in \\mathbb{N}^d` is a multi-index ordered according to the ordering :math:`\\mathfrak{b}`, :py:meth:`compute_ordering` - :math:`P_{k}(x) = \\prod_{i=1}^d P_{k_i}^{(a_i, b_i)}(x_i)` is the product of orthonormal Jacobi polynomials .. math:: \\int_{-1}^{1} P_{k}^{(a_i,b_i)}(u) P_{\\ell}^{(a_i,b_i)}(u) (1-u)^{a_i} (1+u)^{b_i} d u = \\delta_{k\\ell} so that :math:`(P_{k})` are orthonormal w.r.t :math:`\\mu(dx)` - :math:`\\Phi(x) = \\left(P_{\\mathfrak{b}^{-1}(0)}(x), \\dots, P_{\\mathfrak{b}^{-1}(N-1)}(x) \\right)`, see :py:meth:`eval_multiD_polynomials` :param X: :math:`M\\times d` array of :math:`M` points :math:`\\in [-1, 1]^d` :type X: array_like :param Y: :math:`M'\\times d` array of :math:`M'` points :math:`\\in [-1, 1]^d` :type Y: array_like (default None) :param eval_pointwise: sets pointwise evaluation of the kernel, if ``True``, :math:`X` and :math:`Y` must have the same shape, see Returns :type eval_pointwise: bool (default False) :return: If ``eval_pointwise=False`` (default), evaluate the kernel matrix .. math:: \\left(K(x, y)\\right)_{x\\in X, y\\in Y} If ``eval_pointwise=True`` kernel matrix Pointwise evaluation of :math:`K` as depicted in the following pseudo code output - if ``Y`` is ``None`` - :math:`\\left(K(x, y)\\right)_{x\\in X, y\\in X}` if ``eval_pointwise=False`` - :math:`\\left(K(x, x)\\right)_{x\\in X}` if ``eval_pointwise=True`` - otherwise - :math:`\\left(K(x, y)\\right)_{x\\in X, y\\in Y}` if ``eval_pointwise=False`` - :math:`\\left(K(x, y)\\right)_{(x, y)\\in (X, Y)}` if ``eval_pointwise=True`` (in this case X and Y should have the same shape) :rtype: array_like .. seealso:: :py:meth:`eval_multiD_polynomials` """ X = np.atleast_2d(X) if Y is None or Y is X: phi_X = self.eval_multiD_polynomials(X) if eval_pointwise: return inner1d(phi_X, phi_X, axis=1) else: return phi_X.dot(phi_X.T) else: len_X = len(X) phi_XY = self.eval_multiD_polynomials(np.vstack((X, Y))) if eval_pointwise: return inner1d(phi_XY[:len_X], phi_XY[len_X:], axis=1) else: return phi_XY[:len_X].dot(phi_XY[len_X:].T)