def cka_coefficient( X: np.ndarray, Y: np.ndarray, random_state: int = 123, ) -> Dict: """simple function to calculate the rv coefficient""" # estimate sigmas sigma_X = estimate_sigma(X, percent=50) sigma_Y = estimate_sigma(Y, percent=50) # calculate the kernel matrices X_gram = RBF(sigma_X)(X) Y_gram = RBF(sigma_Y)(Y) # center the kernels X_gram = KernelCenterer().fit_transform(X_gram) Y_gram = KernelCenterer().fit_transform(Y_gram) # normalizing coefficients (denomenator) x_norm = np.linalg.norm(X_gram) y_norm = np.linalg.norm(Y_gram) # frobenius norm of the cross terms (numerator) xy_norm = np.sum(X_gram * Y_gram) # rv coefficient pv_coeff = xy_norm / x_norm / y_norm return { "cka_coeff": pv_coeff, "cka_y_norm": y_norm, "cka_x_norm": x_norm, "cka_xy_norm": xy_norm, }
def __init__(self, degree=3, coef0=1, kernel_params=None, alpha=1.0, eigen_solver='auto', neigh=8, tol=0, max_iter=None, remove_zero_eig=True, n_components=2, random_state=None, n_jobs=None, coeficient=None, nkernel=10): self.kernel_params = kernel_params self.gamma = 0.0001 self.neigh = neigh self.nkernel = nkernel self.n_components = n_components self.degree = degree self.coef0 = coef0 self.alpha = alpha self.eigen_solver = eigen_solver self.remove_zero_eig = remove_zero_eig self.tol = tol self.max_iter = max_iter self.random_state = random_state self.n_jobs = n_jobs self._centerer = KernelCenterer() self.coeficient = coeficient
def __init__(self, n_components=None, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, remove_zero_eig=False): if fit_inverse_transform and kernel == 'precomputed': raise ValueError( "Cannot fit_inverse_transform with a precomputed kernel.") self.n_components = n_components self.kernel = kernel self.kernel_params = kernel_params self.gamma = gamma self.degree = degree self.coef0 = coef0 self.alpha = alpha self.fit_inverse_transform = fit_inverse_transform self.eigen_solver = eigen_solver self.remove_zero_eig = remove_zero_eig self.tol = tol self.max_iter = max_iter self._centerer = KernelCenterer()
def kernel_alignment(K_x: np.array, K_y: np.array, center: bool = False) -> float: """Gives a target kernel alignment score: how aligned the kernels are. Very useful for measures which depend on comparing two different kernels, e.g. Hilbert-Schmidt Independence Criterion (a.k.a. Maximum Mean Discrepency) Note: the centered target kernel alignment score is the same function with the center flag = True. Parameters ---------- K_x : np.array, (n_samples, n_samples) The first kernel matrix, K(X,X') K_y : np.array, (n_samples, n_samples) The second kernel matrix, K(Y,Y') center : Bool, (default: False) The option to center the kernels (independently) before hand. Returns ------- kta_score : float, (centered) target kernel alignment score. """ # center kernels if center: K_x = KernelCenterer().fit_transform(K_x) K_y = KernelCenterer().fit_transform(K_y) # target kernel alignment return np.sum(K_x * K_y) / np.linalg.norm(K_x) / np.linalg.norm(K_y)
def KernelPCA(X): # pdist to calculate the squared Euclidean distances for every pair of points # in the 100x2 dimensional dataset. sq_dists = pdist(X, 'sqeuclidean') # Variance of the Euclidean distance between all pairs of data points. variance = np.var(sq_dists) # squareform to converts the pairwise distances into a symmetric 100x100 matrix mat_sq_dists = squareform(sq_dists) # set the gamma parameter equal to the one I used in scikit-learn KernelPCA gamma = 15 # Compute the 100x100 kernel matrix K = exp(-gamma * mat_sq_dists) # Center the kernel matrix kern_cent = KernelCenterer() K = kern_cent.fit_transform(K) # Get eigenvalues in ascending order with corresponding # eigenvectors from the symmetric matrix eigvals, eigvecs = eigh(K) # Get the eigenvectors that corresponds to the highest eigenvalue X_pc1 = eigvecs[:, -1] return X_pc1
def __init__(self, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, remove_zero_eig=False, n_components=2, random_state=None, copy_X=True, n_jobs=None, coeficient=None, nkernel=10): self.kernel_params = kernel_params self.gamma = gamma self.nkernel = nkernel self.n_components = n_components self.degree = degree self.coef0 = coef0 self.alpha = alpha self.fit_inverse_transform = fit_inverse_transform self.eigen_solver = eigen_solver self.remove_zero_eig = remove_zero_eig self.tol = tol self.max_iter = max_iter self.random_state = random_state self.n_jobs = n_jobs self.copy_X = copy_X self._centerer = KernelCenterer() self.coeficient = coeficient
def __init__(self, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3, kernel="linear", gamma=None, degree=3, coef0=1, norm_covariance=False, priors=None, print_timing=False): self.use_total_scatter = use_total_scatter self.sigma_sqrd = sigma_sqrd self.tol = tol self.kernel = kernel.lower() self.gamma = gamma self.degree = degree self.coef0 = coef0 self._centerer = KernelCenterer() self.norm_covariance = norm_covariance self.print_timing = print_timing self.priors = np.asarray(priors) if priors is not None else None if self.priors is not None: if (self.priors < 0).any(): raise ValueError('priors must be non-negative') if self.priors.sum() != 1: print 'warning: the priors do not sum to 1. Renormalizing' self.priors = self.priors / self.priors.sum()
def kernel_alignment(K1, K2, centered=False): """ Calculate (centered) kernel alignment score between the two kernels Kx and Ky: A(Kx, Ky) = <Kx, Ky>_F / (|Kx|_F * |Ky|_F) = <Kx, Ky>_F / sqrt(<Kx, Kx>_F * <Ky, Ky>_F) |A|_F = sqrt(<A, A>_F) :param K1: array-like, shape = (n_samples, n_samples) :param K2: array-like, shape = (n_samples, n_samples) :param centered: boolean, indicating whether the centered kernel alignment should be calculated. :return: scalar, (centered) kernel alignment score """ if K1.shape != K2.shape: raise ValueError("Matrices must have same shape.") if centered: K1_c = KernelCenterer().fit_transform(K1) K2_c = KernelCenterer().fit_transform(K2) else: K1_c = K1 K2_c = K2 # Calculate alignment fprod_12 = frobenius_product(K1_c, K2_c) fprod_11 = frobenius_product(K1_c) fprod_22 = frobenius_product(K2_c) return fprod_12 / np.sqrt(fprod_11 * fprod_22)
def chooseKernel(data, kerneltype='euclidean'): r"""Kernalize data (uses sklearn) Parameters ========== data : array of shape (n_individuals, n_dimensions) Data matrix. kerneltype : {'euclidean', 'cosine', 'laplacian', 'polynomial_kernel', 'jaccard'}, optional Kernel type. Returns ======= array of shape (n_individuals, n_individuals) """ if kerneltype == 'euclidean': K = np.divide(1, (1+pairwise_distances(data, metric="euclidean"))) elif kerneltype == 'cosine': K = (pairwise.cosine_kernel(data)) elif kerneltype == 'laplacian': K = (pairwise.laplacian_kernel(data)) elif kerneltype == 'linear': K = (pairwise.linear_kernel(data)) elif kerneltype == 'polynomial_kernel': K = (pairwise.polynomial_kernel(data)) elif kerneltype == 'jaccard': K = 1-distance.cdist(data, data, metric='jaccard') scaler = KernelCenterer().fit(K) return(scaler.transform(K))
def fit(self, X, y=None): """Fit the model from data in X. Parameters ---------- X : array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- self : object Returns the instance itself. """ X = check_array(X, accept_sparse='csr', copy=self.copy_X) self._centerer = KernelCenterer() K = self._get_kernel(X) self.K = K self._fit_transform(K) if self.fit_inverse_transform: # no need to use the kernel to transform X, use shortcut expression X_transformed = self.alphas_ * np.sqrt(self.lambdas_) self._fit_inverse_transform(X_transformed, X) self.X_fit_ = X return self
def fit(self, X, Y, X_sparse=None, Kmm=None, Knm=None): X, Y = check_X_y(X, Y, y_numeric=True, multi_output=True) if X_sparse is None: fps_idxs, _ = self.FPS(X, self.n_active) self.X_sparse = X[fps_idxs, :] else: self.X_sparse = X_sparse if Kmm is None: Kmm = self._get_kernel(self.X_sparse, self.X_sparse) if self.center: Kmm = KernelCenterer().fit_transform(Kmm) self.Kmm = Kmm if Knm is None: Knm = self._get_kernel(X, self.X_sparse) Knm = KernelCenterer().fit_transform(Knm) self.Knm = Knm vmm, Umm = self._eig_solver(self.Kmm, k=self.n_active) vmm_inv = np.linalg.pinv(np.diagflat(vmm[:self.n_active - 1])) phi_active = self.Knm @ Umm[:, :self.n_active - 1] @ np.sqrt(vmm_inv) C = phi_active.T @ phi_active v_C, U_C = self._eig_solver(C, tol=0, k=self.n_active) U_C = U_C[:, v_C > 0] v_C = v_C[v_C > 0] v_C_inv = np.linalg.pinv(np.diagflat(v_C)) Csqrt = U_C @ np.diagflat(np.sqrt(v_C)) @ U_C.T iCsqrt = U_C @ np.sqrt(v_C_inv) @ U_C.T C_pca = C C_lr = np.linalg.pinv(C + self.regularization * np.eye(C.shape[0])) C_lr = iCsqrt @ phi_active.T @ phi_active @ C_lr @ phi_active.T if len(Y.shape) == 1: C_lr = C_lr @ Y.reshape(-1, 1) else: C_lr = C_lr @ Y C_lr = C_lr @ C_lr.T Ct = self.mixing * C_pca + (1 - self.mixing) * C_lr v_Ct, U_Ct = self._eig_solver(Ct, tol=0, k=self.n_active) PPT = iCsqrt @ U_Ct[:, :self.n_components] @ np.diag( np.sqrt(v_Ct[:self.n_components])) PKT = Umm[:, :self.n_active - 1] @ np.sqrt(vmm_inv) self.pkt_ = PKT @ PPT T = self.Knm @ self.pkt_ PT = np.linalg.pinv(T.T @ T) @ T.T self.pty_ = PT @ Y self.ptx_ = PT @ X
def __init__(self, kernel_type="linear", degree=2, gamma=None, coef0=1): self.kernel_type = kernel_type self.degree = degree self.gamma = gamma self.coef0 = coef0 self.centerer = KernelCenterer()
def fit(self, X, Y): # Check sizes of X, Y X = check_array(X, ensure_2d=True) Y = check_array(Y, ensure_2d=True) # Check samples are the same assert ( X.shape[0] == Y.shape[0] ), f"Samples of X ({X.shape[0]}) and Samples of Y ({Y.shape[0]}) are not the same" self.n_samples = X.shape[0] self.dx_dimensions = X.shape[1] self.dy_dimensions = Y.shape[1] # subsample data if necessary X = subset_indices(X, subsample=self.subsample, random_state=self.random_state) Y = subset_indices(Y, subsample=self.subsample, random_state=self.random_state) self.X_train_ = X self.Y_train_ = Y # Calculate the kernel matrices K_x = self.compute_kernel(X, kernel=self.kernel_X, gamma=self.gamma_X, params=self.kernel_params_X) K_y = self.compute_kernel(Y, kernel=self.kernel_Y, gamma=self.gamma_Y, params=self.kernel_params_Y) # Center Kernel # H = np.eye(n_samples) - (1 / n_samples) * np.ones(n_samples) # K_xc = K_x @ H if self.center == True: K_x = KernelCenterer().fit_transform(K_x) K_y = KernelCenterer().fit_transform(K_y) # Compute HSIC value self.hsic_value = np.sum(K_x * K_y) # Calculate magnitudes self.K_x_norm = np.linalg.norm(K_x) self.K_y_norm = np.linalg.norm(K_y) return self
def fit(self, X, y=None): X = check_array(X, accept_sparse='csr', copy=self.copy_X) self._centerer = KernelCenterer() K = self._get_kernel(X) self._fit_transform(K) if self.fit_inverse_transform: # no need to use the kernel to transform X, use shortcut expression X_transformed = self.alphas_ * np.sqrt(self.lambdas_) self._fit_inverse_transform(X_transformed, X) self.X_fit_ = X return self
def _define_Kmm_Knm(self, X, Kmm=None, Knm=None): if Kmm is None or Knm is None: i_sparse, _ = self.FPS(X, self.n_active) self.X_sparse = X[i_sparse, :] Kmm = self._get_kernel(self.X_sparse, self.X_sparse) Knm = self._get_kernel(X, self.X_sparse) if self.center: Kmm = KernelCenterer().fit_transform(Kmm) Knm = KernelCenterer().fit_transform(Knm) self.Kmm = Kmm self.Knm = Knm
def test_kernelcenterer_vs_sklearn(): # Compare msmbuilder.preprocessing.KernelCenterer # with sklearn.preprocessing.KernelCenterer kernelcentererr = KernelCentererR() kernelcentererr.fit(np.concatenate(trajs)) kernelcenterer = KernelCenterer() kernelcenterer.fit(trajs) y_ref1 = kernelcentererr.transform(trajs[0]) y1 = kernelcenterer.transform(trajs)[0] np.testing.assert_array_almost_equal(y_ref1, y1)
def test_kernelcenterer_vs_sklearn(): # Compare msmbuilder.preprocessing.KernelCenterer # with sklearn.preprocessing.KernelCenterer kernelcentererr = KernelCentererR() kernelcentererr.fit(np.concatenate(trajs)) kernelcenterer = KernelCenterer() kernelcenterer.fit(trajs) y_ref1 = kernelcentererr.transform(trajs[0]) y1 = kernelcenterer.transform(trajs)[0] np.testing.assert_array_almost_equal(y_ref1, y1)
def chooseKernel(data, kerneltype='euclidean'): if kerneltype == 'euclidean': K = np.divide(1, (1 + pairwise_distances(data, metric="euclidean"))) elif kerneltype == 'cosine': K = (pairwise.cosine_kernel(data)) elif kerneltype == 'laplacian': K = (pairwise.laplacian_kernel(data)) elif kerneltype == 'linear': K = (pairwise.linear_kernel(data)) elif kerneltype == 'polynomial_kernel': K = (pairwise.polynomial_kernel(data)) elif kerneltype == 'jaccard': K = 1 - distance.cdist(data, data, metric='jaccard') scaler = KernelCenterer().fit(K) return (scaler.transform(K))
def __init__(self, sigma_sqrd=1e-8, tol=1.0e-3, kernel="linear", gamma=None, degree=3, coef0=1): self.sigma_sqrd = sigma_sqrd self.tol = tol self.kernel = kernel.lower() self.gamma = gamma self.degree = degree self.coef0 = coef0 self._centerer = KernelCenterer()
def fit(self, X, Y): # Check sizes of X, Y X = check_array(X, ensure_2d=True) Y = check_array(Y, ensure_2d=True) # Check samples are the same assert ( X.shape[1] == Y.shape[1] ), f"Samples of X ({X.shape[1]}) and Samples of Y ({Y.shape[1]}) are not the same" self.n_samples = X.shape[0] self.dx_dimensions = X.shape[1] self.dy_dimensions = Y.shape[1] # subsample data if necessary if self.subsample is not None: X = self.rng.permutation(X)[:self.subsample, :] Y = self.rng.permutation(Y)[:self.subsample, :] self.X_train_ = X self.Y_train_ = Y # compute covariance C_xy = covariance(X, Y) C_xx = covariance(X) C_yy = covariance(Y) # Center Kernel # H = np.eye(n_samples) - (1 / n_samples) * np.ones(n_samples) # K_xc = K_x @ H if self.center == True: C_xy = KernelCenterer().fit_transform(C_xy) C_xy = KernelCenterer().fit_transform(C_xy) C_xy = KernelCenterer().fit_transform(C_xy) self.C_xy = C_xy self.C_xx = C_xx self.C_yy = C_yy # Compute covariance value self.C_xy_norm = np.sum(C_xy**2) # Compute normalization self.C_xx_norm = np.linalg.norm(C_xx) self.C_yy_norm = np.linalg.norm(C_yy) return self
def __init__(self, C=1, relaxation="classic", coef0=1, degree=2, gamma=1.5, kernel_type=None): self.C = C self.relaxation = relaxation self.coef0 = 1 self.degree = degree self.gamma = gamma self.kernel_type = kernel_type self.centerer = KernelCenterer()
def make_group_Kprot(DB): import math from sklearn.preprocessing import KernelCenterer path = 'data/' + DB + '/' list_FASTA = pickle.load(open(path + DB + '_list_FASTA.data', 'rb')) nb_prot = len(list(list_FASTA)) X = np.zeros((nb_prot, nb_prot)) for i in range(nb_prot): # print(i) j = i for line in open(path + 'res/LA_' + str(i) + '.txt', 'r'): r = float(line.rstrip()) X[i, j] = r X[j, i] = X[i, j] j += 1 if j != nb_prot: print(i, 'not total') exit(1) X = KernelCenterer().fit_transform(X) K = np.zeros((nb_prot, nb_prot)) for i in range(nb_prot): for j in range(i, nb_prot): K[i, j] = X[i, j] / math.sqrt(X[i, i] * X[j, j]) K[j, i] = K[i, j] pickle.dump(K, open(path + DB + '_Kprot.data', 'wb'), protocol=2)
def center_and_normalise_kernel(K_temp): """ Center and normalise the Kernel matrix Parameters ---------- K_temp : numpy array of shape *nb_item* Kernel matrix Returns ------- K_norm : numpy array of shape *nb_item* centered and normalised Kernel matrix """ K_temp = KernelCenterer().fit_transform(K_temp) nb_item = K_temp.shape[0] K_norm = np.zeros((nb_item, nb_item)) for i in range(nb_item): for j in range(i, nb_item): K_norm[i, j] = K_temp[i, j] / math.sqrt(K_temp[i, i] * K_temp[j, j]) K_norm[j, i] = K_norm[i, j] return K_norm
def __init__(self, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3, kernel="linear", gamma=None, degree=3, coef0=1, norm_covariance = False, priors=None, print_timing=False): self.use_total_scatter = use_total_scatter self.sigma_sqrd = sigma_sqrd self.tol = tol self.kernel = kernel.lower() self.gamma = gamma self.degree = degree self.coef0 = coef0 self._centerer = KernelCenterer() self.norm_covariance = norm_covariance self.print_timing = print_timing self.priors = np.asarray(priors) if priors is not None else None if self.priors is not None: if (self.priors < 0).any(): raise ValueError('priors must be non-negative') if self.priors.sum() != 1: print 'warning: the priors do not sum to 1. Renormalizing' self.priors = self.priors / self.priors.sum()
def KPCA(gamma, data, feature_size): sq_dists = squared_euclidean_distance(data) # squareform to converts the pairwise distances into a symmetric 400x400 matrix mat_sq_dists = squareform(sq_dists) # Compute the 400x400 kernel matrix K = rbfkernel(gamma, mat_sq_dists) # Center the kernel matrix kern_cent = KernelCenterer() K = kern_cent.fit_transform(K) # Get the eigenvector with largest eigenvalue eigen_values, eigen_vectors = eigh(K) indexes = eigen_values.argsort()[::-1] direction_vectors = eigen_vectors[:, indexes[0:feature_size]] projected_data = np.dot(K, direction_vectors) return projected_data
def __init__(self, C=1, relaxation="classic", coef0=1, degree=2, gamma=1.5, kernel_type=None): """ Passive Agressive Support Vector Machine for Online Learning. """ self.C = C self.relaxation = relaxation self.coef0 = 1 self.degree = degree self.gamma = gamma self.kernel_type = kernel_type self.centerer = KernelCenterer()
def cv_mkl(kernel_list, labels, mkl, n_folds, dataset, data): n_sample, n_labels = labels.shape n_km = len(kernel_list) tags = np.loadtxt("../data/cv/"+data+".cv") for i in range(1,n_folds+1): print "Test fold %d" %i res_f = "../svm_result/weights/"+dataset+"_fold_%d_%s.weights" % (i,mkl) para_f = "../svm_result/upperbound/"+dataset+"_fold_%d_%s.ubound" % (i,mkl) test = np.array(tags == i) train = np.array(~test) train_y = labels[train,:] test_y = labels[test,:] n_train = len(train_y) n_test = len(test_y) train_km_list = [] # all train kernels are nomalized and centered for km in kernel_list: kc = KernelCenterer() train_km = km[np.ix_(train, train)] # center train and test kernels kc.fit(train_km) train_km_c = kc.transform(train_km) train_km_list.append(train_km_c) if mkl == 'UNIMKL': res = UNIMKL(train_km_list, train_y) np.savetxt(res_f, res) if mkl == 'ALIGNF2': res = alignf2(train_km_list, train_y, data) np.savetxt(res_f, res) if mkl.find('ALIGNF2SOFT') != -1: bestC, res = ALIGNF2SOFT(train_km_list, train_y, i, tags, data) np.savetxt(res_f, res) np.savetxt(para_f, bestC) if mkl == "TSMKL": W = np.zeros((n_km, n_labels)) for j in xrange(n_labels): print "..label",j W[:,j] = TSMKL(train_km_list, train_y[:,j]) res_f = "../svm_result/weights/"+dataset+"_fold_%d_%s.weights" % (i,mkl) np.savetxt(res_f, W)
def reconstruction_errors(self): G = -0.5 * self.dist_matrix_**2 G_center = KernelCenterer().fit_transform(G) evals = self.kernel_pca_.lambdas_ reconstruction_errors = [] for n in np.arange(1, len(evals)): reconstruction_errors.append( np.sqrt(np.sum(G_center**2) - np.sum(evals[:n]**2)) / G.shape[0]) return np.array(reconstruction_errors)
class KernelPca: # beta: ガウスカーネルパラメータ def __init__(self, beta): self.beta = beta self.centerer = KernelCenterer() # gauss kernel def __kernel(self, x1, x2): return np.exp(-self.beta * np.linalg.norm(x1 - x2)**2) # データを入力して主成分ベクトルを計算する # shape(X) = (N, M) # n: 抽出する主成分の数 def fit_transform(self, X, n): self.X = X # グラム行列 N = X.shape[0] K = np.array([[self.__kernel(X[i], X[j]) for j in range(N)] for i in range(N)]) # 中心化 K = self.centerer.fit_transform(K) # eighは固有値の昇順で出力される vals, vecs = np.linalg.eigh(K) vals = vals[::-1] vecs = vecs[:, ::-1] # 特異値と左特異ベクトル、上位n個 self.sigma = np.sqrt(vals[:n]) # (n) self.a = np.array(vecs[:, :n]) # (N,n) return self.sigma * self.a # (N,n) # xの主成分表示を返す # shape(x)=(Nx, M) def transform(self, x): # グラム行列 N = self.X.shape[0] Nx = x.shape[0] K = np.array([[self.__kernel(x[i], self.X[j]) for j in range(N)] for i in range(Nx)]) # (Nx,N) # 中心化 K = self.centerer.transform(K) # 主成分を計算 return K.dot(self.a) / self.sigma # (Nx,n)
def test_center_kernel(): """Test that KernelCenterer is equivalent to Scaler in feature space""" X_fit = np.random.random((5, 4)) scaler = Scaler(with_std=False) scaler.fit(X_fit) X_fit_centered = scaler.transform(X_fit) K_fit = np.dot(X_fit, X_fit.T) # center fit time matrix centerer = KernelCenterer() K_fit_centered = np.dot(X_fit_centered, X_fit_centered.T) K_fit_centered2 = centerer.fit_transform(K_fit) assert_array_almost_equal(K_fit_centered, K_fit_centered2) # center predict time matrix X_pred = np.random.random((2, 4)) K_pred = np.dot(X_pred, X_fit.T) X_pred_centered = scaler.transform(X_pred) K_pred_centered = np.dot(X_pred_centered, X_fit_centered.T) K_pred_centered2 = centerer.transform(K_pred) assert_array_almost_equal(K_pred_centered, K_pred_centered2)
def center_normTrace_decomp(K): print 'centering kernel' #### Get transformed features for K_train that DONT snoop when centering, tracing, or eiging##### Kcent=KernelCenterer() Ktrain=Kcent.fit_transform(K[:in_samples,:in_samples]) #Ktrain=Ktrain/float(np.trace(Ktrain)) #[EigVals,EigVectors]=scipy.sparse.linalg.eigsh(Ktrain,k=reduced_dimen,which='LM') [EigVals,EigVectors]=scipy.linalg.eigh(Ktrain,eigvals=(in_samples-reduced_dimen,in_samples-1)) for i in range(len(EigVals)): if EigVals[i]<=0: EigVals[i]=0 EigVals=np.flipud(np.fliplr(np.diag(EigVals))) EigVectors=np.fliplr(EigVectors) Ktrain_decomp=np.dot(EigVectors,scipy.linalg.sqrtm(EigVals)) #### Get transformed features for K_test using K_train implied mapping #### Kcent=KernelCenterer() Kfull=Kcent.fit_transform(K) #Kfull=Kfull/float(np.trace(Kfull)) K_train_test=Kfull[in_samples:,:in_samples] Ktest_decomp=np.dot(K_train_test,np.linalg.pinv(Ktrain_decomp.T)) ####combine mapped train and test vectors and normalize each vector#### Kdecomp=np.vstack((Ktrain_decomp,Ktest_decomp)) print 'doing normalization' Kdecomp=normalize(Kdecomp,copy=False) return Kdecomp
def predict(self, X=None, Knm=None): if X is None and Knm is None: raise Exception("Error: required feature or kernel matrices") if self.pky_ is None: raise Exception( "Error: must fit the KRR model before transforming") else: if Knm is None: Knm = self._get_kernel(X, self.X_sparse) Knm = KernelCenterer().fit_transform(Knm) Yp = Knm @ self.pky_ return Yp
def __init__(self, n_components, kernel='linear', eigen_solver='auto', max_iterations=None, gamma=0, degree=3, coef0=1, alpha=1.0, tolerance=0, fit_inverse_transform=False): self._n_components = n_components self._gamma = gamma self._tolerance = tolerance self._fit_inverse_transform = fit_inverse_transform self._max_iterations = max_iterations self._degree = degree self._kernel = kernel self._eigen_solver = eigen_solver self._coef0 = coef0 self._centerer = KernelCenterer() self._alpha = alpha
def __init__(self, n_components=None, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None, eigen_solver='auto', tol=0, max_iter=None, random_state=None,center=False): self.n_components = n_components self._kernel = kernel self.kernel_params = kernel_params self.gamma = gamma self.degree = degree self.coef0 = coef0 self.eigen_solver = eigen_solver self.tol = tol self.max_iter = max_iter self.random_state = random_state self._centerer = KernelCenterer() self.center = center
def __init__(self, n_components=None, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, remove_zero_eig=False): if fit_inverse_transform and kernel == 'precomputed': raise ValueError( "Cannot fit_inverse_transform with a precomputed kernel.") self.n_components = n_components self.kernel = kernel self.kernel_params = kernel_params self.gamma = gamma self.degree = degree self.coef0 = coef0 self.alpha = alpha self.fit_inverse_transform = fit_inverse_transform self.eigen_solver = eigen_solver self.remove_zero_eig = remove_zero_eig self.tol = tol self.max_iter = max_iter self._centerer = KernelCenterer()
class KernelECA(BaseEstimator, TransformerMixin): """Kernel Entropy component analysis (KECA) Non-linear dimensionality reduction through the use of kernels (see :ref:`metrics`). Parameters ---------- n_components: int or None Number of components. If None, all non-zero components are kept. kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" Kernel. Default: "linear" degree : int, default=3 Degree for poly kernels. Ignored by other kernels. gamma : float, optional Kernel coefficient for rbf and poly kernels. Default: 1/n_features. Ignored by other kernels. coef0 : float, optional Independent term in poly and sigmoid kernels. Ignored by other kernels. kernel_params : mapping of string to any, optional Parameters (keyword arguments) and values for kernel passed as callable object. Ignored by other kernels. eigen_solver: string ['auto'|'dense'|'arpack'] Select eigensolver to use. If n_components is much less than the number of training samples, arpack may be more efficient than the dense eigensolver. tol: float convergence tolerance for arpack. Default: 0 (optimal value will be chosen by arpack) max_iter : int maximum number of iterations for arpack Default: None (optimal value will be chosen by arpack) random_state : int seed, RandomState instance, or None, default : None A pseudo random number generator used for the initialization of the residuals when eigen_solver == 'arpack'. Attributes ---------- lambdas_ : Eigenvalues of the centered kernel matrix alphas_ : Eigenvectors of the centered kernel matrix dual_coef_ : Inverse transform matrix X_transformed_fit_ : Projection of the fitted data on the kernel entropy components References ---------- Kernel ECA based on: (c) Robert Jenssen, University of Tromso, Norway, 2010 R. Jenssen, "Kernel Entropy Component Analysis," IEEE Trans. Patt. Anal. Mach. Intel., 32(5), 847-860, 2010. """ def __init__(self, n_components=None, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None, eigen_solver='auto', tol=0, max_iter=None, random_state=None,center=False): self.n_components = n_components self._kernel = kernel self.kernel_params = kernel_params self.gamma = gamma self.degree = degree self.coef0 = coef0 self.eigen_solver = eigen_solver self.tol = tol self.max_iter = max_iter self.random_state = random_state self._centerer = KernelCenterer() self.center = center @property def _pairwise(self): return self.kernel == "precomputed" def _get_kernel(self, X, Y=None): if callable(self._kernel): params = self.kernel_params or {} else: params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0} return pairwise_kernels(X, Y, metric=self._kernel, filter_params=True, **params) def _fit_transform(self, K): """ Fit's using kernel K""" # center kernel if self.center == True: K = self._centerer.fit_transform(K) X_transformed = self.kernelECA(K=K) self.X_transformed = X_transformed return K def fit(self, X, y=None): """Fit the model from data in X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- self : object Returns the instance itself. """ K = self._get_kernel(X) self._fit_transform(K) self.X_fit_ = X return self def fit_transform(self, X, y=None, **params): """Fit the model from data in X and transform X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_new: array-like, shape (n_samples, n_components) """ self.fit(X, **params) X_transformed= self.X_transformed return X_transformed def transform(self, X): """Transform X. Parameters ---------- X: array-like, shape (n_samples, n_features) Returns ------- X_new: array-like, shape (n_samples, n_components) """ check_is_fitted(self, 'X_fit_') K = self._centerer.transform(self._get_kernel(X, self.X_fit_)) return np.dot(K, self.alphas_ / np.sqrt(self.lambdas_)) def inverse_transform(self, X): raise NotImplementedError("Function inverse_transform is not implemented.") # here are the helper functions => to integrate in the code! def kernelECA(self,K): if self.n_components is None: n_components = K.shape[0] else: n_components = min(K.shape[0], self.n_components) # compute eigenvectors self.lambdas_, self.alphas_ = linalg.eigh(K) d = self.lambdas_ E = self.alphas_ # sort eigenvectors in descending order D,E = self.sort_eigenvalues(d,E) d = np.diag(D) sorted_entropy_index,entropy = self.ECA(D,E) Es = E[:,sorted_entropy_index] ds = d[sorted_entropy_index] Phi = np.zeros((K.shape[0],n_components)) for i in range(n_components): Phi[:,i] = np.sqrt(ds[i]) * Es[:,i] X_transformed = Phi return X_transformed def sort_eigenvalues(self,D,E): d = D indices = np.argsort(d)[::-1] d = d[indices] D = np.zeros((len(d),len(d))) for i in range(len(d)): D[i,i] = d[i] E = E[:,indices] return D,E def ECA(self,D,E): N = E.shape[0] entropy = np.multiply(np.diag(D).T , (np.dot(np.ones((1,N)),E))**2)[0] indices = np.argsort(entropy)[::-1] entropy = entropy[indices] return indices,entropy
def fit(self, X, Y): """Fit the KCCA model with two views represented by kernels X and Y. Parameters ---------- X : array_like, shape = (n_samples, n_features) for data matrix or shape = (n_samples, n_samples) for kernel matrix. When both X and Y are kernel matrix, the kernel parameter should be set to 'precomputed'. It is considered to be one view of the data. Y : array_like, shape = (n_samples, n_features) for data matrix or shape = (n_samples, n_samples) for kernel matrix. When both X and Y are kernel matrix, the kernel parameter should be set to 'precomputed'. It is considered to be another view of the data. Returns ------- self : object Returns the instance itself. """ check_consistent_length(X, Y) X = check_array(X, dtype=np.float, copy=self.copy) Y = check_array(Y, dtype=np.float, copy=self.copy, ensure_2d=False) if Y.ndim == 1: Y = Y.reshape(-1,1) n = X.shape[0] p = X.shape[1] q = Y.shape[1] if self.n_components < 1 or self.n_components > n: raise ValueError('Invalid number of components: %d' % self.n_components) if self.eigen_solver not in ("auto", "dense", "arpack"): raise ValueError("Got eigen_solver %s when only 'auto', " "'dense' and 'arparck' are valid" % self.algorithm) if self.kernel == 'precomputed' and (p != n or q != n): raise ValueError('Invalid kernel matrices dimension') if not self.pgso and (self.kapa <= 0 or self.kapa >= 1): raise ValueError('kapa should be in (0, 1) when pgso=False') if self.pgso and (self.kapa < 0 or self.kapa > 1): raise ValueError('kapa should be in [0, 1] when pgso=True') KX = self._get_kernel(X) KY = self._get_kernel(Y) if self.center: kc = KernelCenterer() self.KXc_ = kc.fit_transform(KX) self.KYc_ = kc.fit_transform(KY) else: self.KXc_ = KX self.KYc_ = KY if self.pgso: # use PGSO to decompose kernel matrix self._fit_pgso(self.KXc_, self.KYc_) else: self._fit(self.KXc_, self.KYc_) return self
def ALIGNFSOFT(kernel_list, ky, y, test_fold, tags): # Find best upper bound in CV and train on whole data # Reutrn the weights y = y.ravel() n_km = len(kernel_list) tag = np.array(tags) tag = tag[tag!=test_fold] remain_fold = np.unique(tag).tolist() all_best_c = [] for validate_fold in remain_fold: train = tag != validate_fold validate = tag == validate_fold # train on train fold ,validate on validate_fold. # Do not use test fold. test fold used in outter cv ky_train = ky[np.ix_(train, train)] y_train = y[train] y_validate = y[validate] train_km_list = [] validate_km_list = [] n_train = len(y_train) n_validate = len(y_validate) for km in kernel_list: kc = KernelCenterer() train_km = km[np.ix_(train, train)] validate_km = km[np.ix_(validate, train)] # center train and validate kernels train_km_c = kc.fit_transform(train_km) train_km_list.append(train_km_c) validate_km_c = kc.transform(validate_km) validate_km_list.append(validate_km_c) # if the label is too biased, SVM CV will fail, just return ALIGNF solution if np.sum(y_train==1) > n_train-3 or np.sum(y_train==-1) > n_train-3: return 1e8, ALIGNFSLACK(train_km_list, ky_train, 1e8) Cs = np.exp2(np.array(range(-9,7))).tolist() + [1e8] W = np.zeros((n_km, len(Cs))) for i in xrange(len(Cs)): W[:,i] = ALIGNFSLACK(train_km_list, ky_train, Cs[i]) W = W / np.linalg.norm(W, 2, 0) f1 = np.zeros(len(Cs)) for i in xrange(len(Cs)): train_ckm = np.zeros((n_train,n_train)) validate_ckm = np.zeros((n_validate,n_train)) w = W[:,i] for j in xrange(n_km): train_ckm += w[j]*train_km_list[j] validate_ckm += w[j]*validate_km_list[j] f1[i] = svm(train_ckm, validate_ckm, y_train, y_validate) # return the first maximum maxind = np.argmax(f1) bestC = Cs[maxind] all_best_c.append(bestC) print f1 print "..Best C is", bestC bestC = np.mean(all_best_c) print "..Take the average best upper bound", bestC # use the best upper bound to solve ALIGNFSOFT return bestC, ALIGNFSLACK(kernel_list, ky, bestC)
class KernelPCA(BaseEstimator, TransformerMixin): """Kernel Principal component analysis (KPCA) Non-linear dimensionality reduction through the use of kernels (see :ref:`metrics`). Parameters ---------- n_components: int or None Number of components. If None, all non-zero components are kept. kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" Kernel. Default: "linear" degree : int, default=3 Degree for poly kernels. Ignored by other kernels. gamma : float, optional Kernel coefficient for rbf and poly kernels. Default: 1/n_features. Ignored by other kernels. coef0 : float, optional Independent term in poly and sigmoid kernels. Ignored by other kernels. kernel_params : mapping of string to any, optional Parameters (keyword arguments) and values for kernel passed as callable object. Ignored by other kernels. alpha: int Hyperparameter of the ridge regression that learns the inverse transform (when fit_inverse_transform=True). Default: 1.0 fit_inverse_transform: bool Learn the inverse transform for non-precomputed kernels. (i.e. learn to find the pre-image of a point) Default: False eigen_solver: string ['auto'|'dense'|'arpack'] Select eigensolver to use. If n_components is much less than the number of training samples, arpack may be more efficient than the dense eigensolver. tol: float convergence tolerance for arpack. Default: 0 (optimal value will be chosen by arpack) max_iter : int maximum number of iterations for arpack Default: None (optimal value will be chosen by arpack) remove_zero_eig : boolean, default=True If True, then all components with zero eigenvalues are removed, so that the number of components in the output may be < n_components (and sometimes even zero due to numerical instability). When n_components is None, this parameter is ignored and components with zero eigenvalues are removed regardless. Attributes ---------- lambdas_ : Eigenvalues of the centered kernel matrix alphas_ : Eigenvectors of the centered kernel matrix evals_ : array[float], shape=(n_features) All eigenvalues of centered kernel matrix evecs_ : array[float, float], shape=(n_features, n_samples) All eigenvectors of centered kernel matrix dual_coef_ : Inverse transform matrix X_transformed_fit_ : Projection of the fitted data on the kernel principal components References ---------- Kernel PCA was introduced in: Bernhard Schoelkopf, Alexander J. Smola, and Klaus-Robert Mueller. 1999. Kernel principal component analysis. In Advances in kernel methods, MIT Press, Cambridge, MA, USA 327-352. """ def __init__(self, n_components=None, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, remove_zero_eig=False): if fit_inverse_transform and kernel == 'precomputed': raise ValueError( "Cannot fit_inverse_transform with a precomputed kernel.") self.n_components = n_components self.kernel = kernel self.kernel_params = kernel_params self.gamma = gamma self.degree = degree self.coef0 = coef0 self.alpha = alpha self.fit_inverse_transform = fit_inverse_transform self.eigen_solver = eigen_solver self.remove_zero_eig = remove_zero_eig self.tol = tol self.max_iter = max_iter self._centerer = KernelCenterer() @property def _pairwise(self): return self.kernel == "precomputed" def _get_kernel(self, X, Y=None): if callable(self.kernel): params = self.kernel_params or {} else: params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0} return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params) def _fit_transform(self, K): """ Fit's using kernel K""" # center kernel K = self._centerer.fit_transform(K) if self.n_components is None: n_components = K.shape[0] else: n_components = min(K.shape[0], self.n_components) # compute eigenvectors if self.eigen_solver == 'auto': if K.shape[0] > 200 and n_components < 10: eigen_solver = 'arpack' else: eigen_solver = 'dense' else: eigen_solver = self.eigen_solver if eigen_solver == 'dense': self.lambdas_, self.alphas_ = linalg.eigh( K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1)) self.evals_, self.evecs_ = linalg.eigh(K) elif eigen_solver == 'arpack': self.lambdas_, self.alphas_ = eigsh(K, n_components, which="LA", tol=self.tol, maxiter=self.max_iter) # sort eigenvectors in descending order indices = self.lambdas_.argsort()[::-1] self.lambdas_ = self.lambdas_[indices] self.alphas_ = self.alphas_[:, indices] # remove eigenvectors with a zero eigenvalue if self.remove_zero_eig or self.n_components is None: self.alphas_ = self.alphas_[:, self.lambdas_ > 0] self.lambdas_ = self.lambdas_[self.lambdas_ > 0] return K def _fit_inverse_transform(self, X_transformed, X): if hasattr(X, "tocsr"): raise NotImplementedError("Inverse transform not implemented for " "sparse matrices!") n_samples = X_transformed.shape[0] K = self._get_kernel(X_transformed) K.flat[::n_samples + 1] += self.alpha self.dual_coef_ = linalg.solve(K, X, sym_pos=True, overwrite_a=True) self.X_transformed_fit_ = X_transformed def fit(self, X, y=None): """Fit the model from data in X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- self : object Returns the instance itself. """ K = self._get_kernel(X) self._fit_transform(K) if self.fit_inverse_transform: sqrt_lambdas = np.diag(np.sqrt(self.lambdas_)) X_transformed = np.dot(self.alphas_, sqrt_lambdas) self._fit_inverse_transform(X_transformed, X) self.X_fit_ = X return self def fit_transform(self, X, y=None, **params): """Fit the model from data in X and transform X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_new: array-like, shape (n_samples, n_components) """ self.fit(X, **params) X_transformed = self.alphas_ * np.sqrt(self.lambdas_) if self.fit_inverse_transform: self._fit_inverse_transform(X_transformed, X) return X_transformed def transform(self, X): """Transform X. Parameters ---------- X: array-like, shape (n_samples, n_features) Returns ------- X_new: array-like, shape (n_samples, n_components) """ check_is_fitted(self, 'X_fit_') K = self._centerer.transform(self._get_kernel(X, self.X_fit_)) return np.dot(K, self.alphas_ / np.sqrt(self.lambdas_)) def inverse_transform(self, X): """Transform X back to original space. Parameters ---------- X: array-like, shape (n_samples, n_components) Returns ------- X_new: array-like, shape (n_samples, n_features) References ---------- "Learning to Find Pre-Images", G BakIr et al, 2004. """ if not self.fit_inverse_transform: raise NotFittedError("The fit_inverse_transform parameter was not" " set to True when instantiating and hence " "the inverse transform is not available.") K = self._get_kernel(X, self.X_transformed_fit_) return np.dot(K, self.dual_coef_)
def ovkr_mkl(kernel_list, labels, mkl, n_folds, dataset, data): n_sample, n_labels = labels.shape n_km = len(kernel_list) tags = np.loadtxt("../data/cv/"+data+".cv") #tags = np.array(range(n_sample)) % n_folds + 1 #np.random.seed(1234) #np.random.shuffle(tags) pred = np.zeros((n_sample, n_labels)) # Run for each fold for i in range(1,n_folds+1): print "Test fold %d" %i res_f = "../ovkr_result/weights/"+dataset+"_fold_%d_%s.weights" % (i,mkl) # divide data test = np.array(tags == i) train = np.array(~test) train_y = labels[train,:] test_y = labels[test,:] n_train = len(train_y) n_test = len(test_y) train_km_list = [] test_km_list = [] for km in kernel_list: kc = KernelCenterer() train_km = km[np.ix_(train, train)] test_km = km[np.ix_(test, train)] # center train and test kernels kc.fit(train_km) train_km_c = kc.transform(train_km) test_km_c = kc.transform(test_km) train_km_list.append(train_km_c) test_km_list.append(test_km_c) if mkl == 'UNIMKL': wei = UNIMKL(n_km, n_labels) else: wei = np.loadtxt(res_f, ndmin=2) normw = np.linalg.norm(wei) uni = np.ones(n_km) / np.linalg.norm(np.ones(n_km)) if normw == 0: wei[:,0] = uni else: wei[:,0] = wei[:,0] / normw train_ckm = np.zeros((n_train,n_train)) for t in range(n_km): train_ckm += wei[t,0]*train_km_list[t] # combine train and test kernel using learned weights test_ckm = np.zeros(test_km_list[0].shape) for t in range(n_km): test_ckm = test_ckm + wei[t,0]*test_km_list[t] AP = OVKR_train_CV(train_ckm, train_y, tags[train]) pred_label = OVKR_test(test_ckm, AP) pred[test, :] = pred_label return pred
def ovkr_mkl(kernel_list, labels, mkl, n_folds, dataset, data): n_sample, n_labels = labels.shape n_km = len(kernel_list) tags = np.loadtxt("../data/cv/"+data+".cv") # Add noise to the output noise_level = [0.005, 0.010, 0.015, 0.020, 0.025] for nid in xrange(len(noise_level)): noi = noise_level[nid] print "noise", noi, nid Y = addNoise(labels, noi) pred = np.zeros((n_sample, n_labels)) pred_bin = np.zeros((n_sample, n_labels)) # Run for each fold for i in range(1,n_folds+1): print "Test fold %d" %i res_f = "../ovkr_result/noisy_weights/"+dataset+"_fold_%d_%s_noise_%d.weights" % (i,mkl, nid) # divide data test = np.array(tags == i) train = np.array(~test) train_y = Y[train,:] test_y = Y[test,:] n_train = len(train_y) n_test = len(test_y) train_km_list = [] test_km_list = [] for km in kernel_list: kc = KernelCenterer() train_km = km[np.ix_(train, train)] test_km = km[np.ix_(test, train)] # center train and test kernels kc.fit(train_km) train_km_c = kc.transform(train_km) test_km_c = kc.transform(test_km) train_km_list.append(train_km_c) test_km_list.append(test_km_c) if mkl == 'UNIMKL': wei = UNIMKL(n_km, n_labels) else: wei = np.loadtxt(res_f, ndmin=2) normw = np.linalg.norm(wei) uni = np.ones(n_km) / np.linalg.norm(np.ones(n_km)) if normw == 0: wei[:,0] = uni else: wei[:,0] = wei[:,0] / normw train_ckm = np.zeros((n_train,n_train)) for t in range(n_km): train_ckm += wei[t,0]*train_km_list[t] # combine train and test kernel using learned weights test_ckm = np.zeros(test_km_list[0].shape) for t in range(n_km): test_ckm = test_ckm + wei[t,0]*test_km_list[t] AP = OVKR_train_CV(train_ckm, train_y, tags[train]) pred_label = OVKR_test(test_ckm, AP) pred[test, :] = pred_label pred_real_f = "../ovkr_result/noisy_pred/%s_cvpred_%s_real_noise_%d.npy" % (data, mkl, nid) np.save(pred_real_f, pred)
plt.figure() plt.plot(nComponents,kpcaldaScores,lw=3) plt.xlim(1,np.amax(nComponents)) plt.title('kPCA accuracy') plt.xlabel('Number of components') plt.ylabel('accuracy') plt.xlim([500,1500]) plt.legend (['LDA'],loc='lower right') plt.grid(True) if(0): # K-PCA second round ktrain = pair.rbf_kernel(Xtrain,Xtrain,gamma) ktest = pair.rbf_kernel(Xtest,Xtrain,gamma) kcent = KernelCenterer() kcent.fit(ktrain) ktrain = kcent.transform(ktrain) ktest = kcent.transform(ktest) kpca = PCA() kpca.fit_transform(ktrain) cumvarkPCA2 = np.cumsum(kpca.explained_variance_ratio_[0:220]) # Calculate classifiation scores for each component nComponents = np.arange(1,nFeatures) kpcaScores2 = np.zeros((5,np.alen(nComponents))) for i,n in enumerate(nComponents): kpca2 = PCA(n_components=n) kpca2.fit(ktrain) XtrainT = kpca2.transform(ktrain)
def svm_mkl(kernel_list, labels, mkl, n_folds, dataset, data): n_sample, n_labels = labels.shape n_km = len(kernel_list) tags = np.loadtxt("../data/cv/"+data+".cv") pred = np.zeros((n_sample, n_labels)) # Run for each fold for i in range(1,n_folds+1): print "Test fold %d" %i res_f = "../svm_result/weights/"+dataset+"_fold_%d_%s.weights" % (i,mkl) # divide data test = np.array(tags == (i+1 if i+1<6 else 1)) train = np.array(~test) train_y = labels[train,:] test_y = labels[test,:] n_train = len(train_y) n_test = len(test_y) train_km_list = [] test_km_list = [] for km in kernel_list: kc = KernelCenterer() train_km = km[np.ix_(train, train)] test_km = km[np.ix_(test, train)] # center train and test kernels kc.fit(train_km) train_km_c = kc.transform(train_km) test_km_c = kc.transform(test_km) train_km_list.append(train_km_c) test_km_list.append(test_km_c) if mkl == 'UNIMKL': wei = UNIMKL(n_km, n_labels) else: wei = np.loadtxt(res_f, ndmin=2) # Normalized weights normw = np.linalg.norm(wei, 2, 0) uni = np.ones(n_km) / np.linalg.norm(np.ones(n_km)) for t in xrange(n_labels): if normw[t] == 0: # collapsed solution wei[:,t] = uni else: wei[:,t] = wei[:,t] / normw[t] for j in range(n_labels): tr_y = train_y[:,j] te_y = test_y[:,j] if wei.shape[1] == 1: wj = wei[:,0] else: wj = wei[:,j] ckm = np.zeros((n_train,n_train)) for t in range(n_km): ckm = ckm + wj[t]*train_km_list[t] # combine train and test kernel using learned weights train_ckm = ckm test_ckm = np.zeros(test_km_list[0].shape) for t in range(n_km): test_ckm = test_ckm + wj[t]*test_km_list[t] pred_label = svm(train_ckm, test_ckm, tr_y, te_y, tags[train], i) pred[test, j] = pred_label return pred
return K if __name__ == "__main__": classes = generate_spike_classes(1, 2) train = generate_spike_times(classes) test = generate_spike_times(classes) rasterPlot(train) K = compute_K_matrix(train) ############################### # N = K.shape[0] # H = np.eye(N) - np.tile(1./N, [N, N]); # Kc = np.dot(np.dot(H, K), H) kcenterer = KernelCenterer() # kcenterer.fit(K) # Center Kernel Matrix Kc = kcenterer.transform(K) # ############################### D, E = eig(Kc) proj = np.dot(Kc, E[:, 0:2]) ################################ Center test Kt = compute_K_matrix(train, test) # M = Kt.shape[0] # A = np.tile(K.sum(axis=0), [M, 1]) / N # B = np.tile(Kt.sum(axis=1),[N, 1]) /N # Kc2 = Kt - A - B + K.sum()/ N**2; Kc2 = kcenterer.transform(Kt) proj2 = np.dot(Kc2, E[:, 0:2])
class KernelFisher(BaseEstimator, ClassifierMixin, TransformerMixin): """ Kernalized Fisher Discriminant Analysis (KDA) A classifier with a non-linear decision boundary, generated by fitting class conditional densities to the data fisher criteria of maximizing between class variance while minimizing within class variance. The fisher criteria is used in a non-linear space, by transforming the data, X, of dimension D onto a D-dimensional manifold of a D' dimensional space (where D' is possible infinite) using a funtion f(X). The key to solving the problem in the non-linear space is to write the solution to fisher only in terms of inner products of the vectors X*Y. Then the kernel trick can be employed, such that the standard inner product is promoted to a general inner product. That is, K(X,Y) = X*Y --> K(X,Y) = f(X)*f(Y), which is allowed for valid Kernels. In this case, the function f() does not need to be known, but only the kernel K(X,Y). The fitted model can also be used to reduce the dimensionality of the input, by projecting it to the most discriminative directions. Parameters ---------- use_total_scatter : boolean If True then use total scatter matrix St = Sum_i (x_i - m)(x_i - m).T instead of Sw If False, use Sw = Sum_{c=1... n_classes} Sum_{i; x in class c} norm_c (x_i - m_c)(x_i - m_c).T where norm_c = 1/N_samples_class_c if norm_covariance=True, else norm_c = 1 sigma_sqrd: float smooth regularization parameter, which is size of singular value where smoothing becomes important. NOTE: is fraction in case norm_covariance=False, as a priori the scale of the singular values is not known in this case tol: float used for truncated SVD of St. Essentially a form of regularization. Tol for SVD(R) is 1e-6, fixed right now kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" Kernel used for generalized inner product. Default: "linear" degree : int, optional Degree for poly Default: 3. gamma : float, optional Kernel coefficient for rbf, sigmoid and poly kernels. Default: 1/n_features. coef0 : float, optional Independent term in poly and sigmoid kernels. norm_covariance : boolean if true, the covariance of each class will be divided by (n_points_in_class - 1) NOTE: not currently used priors : array, optional, shape = [n_classes] Priors on classes print_timing: boolean print time for several matrix operations in the algorithm Attributes ---------- `means_` : array-like, shape = [n_components_found_, [n_classes, n_features] ] Class means, for each component found `priors_` : array-like, shape = [n_classes] Class priors (sum to 1) `n_components_found_` : int number of fisher components found, which is <= n_components Examples (put fisher.py in working directory) -------- >>> import numpy as np >>> from fisher import KernelFisher >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) >>> y = np.array([0, 0, 0, 1, 1, 1]) >>> fd = KernelFisher() >>> fd.fit(X, y) KernelFisher(coef0=1, degree=3, gamma=None, kernel='linear', norm_covariance=False, print_timing=False, priors=None, sigma_sqrd=1e-08, tol=0.001, use_total_scatter=True) >>> print(fd.transform([[-0.8, -1]])) [[-7.62102356]]] """ def __init__(self, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3, kernel="linear", gamma=None, degree=3, coef0=1, norm_covariance = False, priors=None, print_timing=False): self.use_total_scatter = use_total_scatter self.sigma_sqrd = sigma_sqrd self.tol = tol self.kernel = kernel.lower() self.gamma = gamma self.degree = degree self.coef0 = coef0 self._centerer = KernelCenterer() self.norm_covariance = norm_covariance self.print_timing = print_timing self.priors = np.asarray(priors) if priors is not None else None if self.priors is not None: if (self.priors < 0).any(): raise ValueError('priors must be non-negative') if self.priors.sum() != 1: print 'warning: the priors do not sum to 1. Renormalizing' self.priors = self.priors / self.priors.sum() @property def _pairwise(self): return self.kernel == "precomputed" def _get_kernel(self, X, Y=None): params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0} try: return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params) except AttributeError: raise ValueError("%s is not a valid kernel. Valid kernels are: " "rbf, poly, sigmoid, linear and precomputed." % self.kernel) def fit(self, X, y): """ Fit the Kernelized Fisher Discriminant model according to the given training data and parameters. Based on "Algorithm 5" in Zhang, et. al. 'Regularized Discriminant Analysis, Ridge Regression and Beyond' Journal of Machine Learning Research 11 (2010) 2199-2228 NOTE: setting norm_covariance=False and use_total_scatter=True, and solution_norm = 'A' or 'B' will give the algorithm from paper Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array, shape = [n_samples] Target values (integers) """ X, y = check_arrays(X, y, sparse_format='dense') self.classes_, y = unique( y, return_inverse=True) n_samples, n_features = X.shape n_classes = len(self.classes_) n_samples_perclass = np.bincount(y) if n_classes < 2: raise ValueError('y has less than 2 classes') if self.priors is None: self.priors_ = np.bincount(y) / float(n_samples) else: self.priors_ = self.priors ts = time.time() self.means_ = [] for ind in xrange(n_classes): Xg = X[y == ind, :] meang = Xg.mean(0) self.means_.append(np.asarray(meang)) if self.print_timing: print 'KernelFisher.fit: means took', time.time() - ts ts = time.time() PI_diag = np.diag( 1.0*n_samples_perclass ) # shape(PI_diag) = n_classes x n_classes PI_inv = np.diag( 1.0 / (1.0*n_samples_perclass) ) # shape(PI_inv) = n_classes x n_classes PI_sqrt_inv = np.sqrt( PI_inv ) # shape(PI_sqrt_inv) = n_classes x n_classes #H = np.identity(n_samples) - (1.0/(1.0*n_samples))*np.ones((n_samples,n_samples)) E=np.zeros( (n_samples,n_classes) ) # shape(E) = n_samples x n_classes E[[range(n_samples),y]]=1 E_PIsi = np.dot(E, PI_sqrt_inv) One_minus_E_Pi_Et = np.identity(n_samples) - np.inner( E, np.inner(PI_diag, E).T ) # shape(One_minus_E_Pi_Et) = n_samples x n_samples if self.print_timing: print 'KernelFisher.fit: matrices took', time.time() - ts ##################################################################################################################### #C = HKH = (I - 1/n 1x1.T) K (I - 1/n 1x1.T) = (K - 1xK_mean.T) * (I - 1/n 1x1.T) # = K - K_meanx1.T - 1xK_mean.T + K_allmean 1x1 # --> which is the same as what self._centerer.fit_transform(C) performs # # if use_total_scatter=False, # then using Sw which is (1-E*Pi*E.T)K(1-E*Pi*E.T) ##################################################################################################################### ts = time.time() C = self._get_kernel(X) K_mean = np.sum(C, axis=1) / (1.0*C.shape[1]) if self.use_total_scatter: C = self._centerer.fit_transform(C) else: C = np.inner( One_minus_E_Pi_Et, np.inner(C, One_minus_E_Pi_Et).T) if self.print_timing: print 'KernelFisher.fit: Kernel Calculation took', time.time() - ts ts = time.time() Uc, Sc, Utc, Sc_norm = self.condensed_svd( C, self.tol, store_singular_vals=True ) if self.print_timing: print 'KernelFisher.fit: Uc, Sc, Utc took', time.time() - ts ts = time.time() #scale up sigma to appropriate range of singular values reg_factor = self.sigma_sqrd * Sc_norm St_reg_inv = np.inner( Uc, np.inner(np.diag(1.0/(Sc + reg_factor)), Utc.T).T ) if self.print_timing: print 'KernelFisher.fit: St_reg_inv took', time.time() - ts ts = time.time() R = np.inner(E_PIsi.T, np.inner(C, np.inner( St_reg_inv, E_PIsi.T ).T ).T ) if self.print_timing: print 'KernelFisher.fit: R took', time.time() - ts ts = time.time() Vr, Lr, Vtr, Lr_norm = self.condensed_svd( R, tol=1e-6 ) if self.print_timing: print 'KernelFisher.fit: Vr, Lr, Vtr took', time.time() - ts ts = time.time() ##################################################################################################################### #This capital Z is Upsilon.T * H from equation (22) ##################################################################################################################### #Z = np.inner( np.diag(1.0 / np.sqrt(Lr)), np.inner(Vtr, np.inner(E_PIsi.T, np.inner(C, St_reg_inv.T ).T ).T ).T ) Z = np.inner( np.inner( np.inner( np.inner( np.diag(1.0 / np.sqrt(Lr)), Vtr.T), E_PIsi), C.T), St_reg_inv) Z = (Z.T - (Z.sum(axis=1) / (1.0*Z.shape[1])) ).T if self.print_timing: print 'KernelFisher.fit: Z took', time.time() - ts self.Z = Z self.n_components_found_ = Z.shape[0] ##################################################################################################################### #This K_mean is (1/n) K*1_n from equation (22) ##################################################################################################################### self.K_mean = K_mean #print Z.shape, K_mean.shape, self.n_components_found_ self.X_fit_ = X return self def condensed_svd(self, M, tol=1e-3, store_singular_vals=False): U, S, Vt = linalg.svd(M, full_matrices=False) if store_singular_vals: self.singular_vals = S #want tolerance on fraction of variance in singular value #when not norm_covariance, need to normalize singular values S_norm = np.sum(S) rank = np.sum( (S/S_norm) > tol ) return U[:,:rank], S[:rank], Vt[:rank,:], S_norm @property def classes(self): warnings.warn("KernelFisher.classes is deprecated and will be removed in 0.14. " "Use .classes_ instead.", DeprecationWarning, stacklevel=2) return self.classes_ def _decision_function(self, X): #X = np.asarray(X) return self.transform(X) def decision_function(self, X): """ This function return the decision function values related to each class on an array of test vectors X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- X_new : array, shape = [n_samples, n_components_found_] Decision function values related to each class, per sample n_components_found_ is the number of components requested and found NOTE: currently identical to self.transform(X) """ return self._decision_function(X) def transform(self, X): """ Project the data so as to maximize class separation (large separation between projected class means and small variance within each class). Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- X_new : array, shape = [n_samples, n_components_found_] """ #X = np.asarray(X) #ts = time.time() k = self._get_kernel(X, self.X_fit_) #if self.print_timing: print 'KernelFisher.transform: k took', time.time() - ts #ts = time.time() z = np.inner(self.Z, (k-self.K_mean) ).T #if self.print_timing: print 'KernelFisher.transform: z took', time.time() - ts return z def fit_transform(self, X, y, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3): """ Fit the Fisher Discriminant model according to the given training data and parameters. The project the data onto up to n_components_found_ so as to maximize class separation (large separation between projected class means and small variance within each class). NOTE this function is not clever, it simply runs fit(X,y [, ...]).transform(X) Parameters ---------- X : array-like, shape = [n_samples, n_features] y : array, shape = [n_samples] Target values (integers) store_covariance : boolean If True the covariance matrix of each class and each iteration is computed and stored in `self.covs_` attribute. has dimensions [n_iterations][2] where 2 is for nclasses = 2 Returns ------- X_new : array, shape = [n_samples, n_components_found_] """ return self.fit(X, y, use_total_scatter=use_total_scatter, sigma_sqrd=sigma_sqrd, tol=tol).transform(X)
def cls(mkl): for data in datasets: print "####################" print '# ',data print "####################" # consider labels with more than 2% t = 0.02 datadir = '../data/' km_dir = datadir + data + "/" if data == 'Fingerprint': kernels = ['PPKr', 'NB','CP2','NI','LB','CPC','RLB','LC','LI','CPK','RLI','CSC'] km_list = [] y = np.loadtxt(km_dir+"y.txt",ndmin=2) p = np.sum(y==1,0)/float(y.shape[0]) y = y[:,p>t] for k in kernels: km_f = datadir + data + ("/%s.txt" % k) km_list.append(normalize_km(np.loadtxt(km_f))) pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl) pred = svm_mkl(km_list, y, mkl, 5, data,data) np.savetxt(pred_f, pred, fmt="%d") elif data == 'plant' or data == 'psortPos' or data == 'psortNeg': y = loadmat(km_dir+"label_%s.mat" % data)['y'].ravel() km_list = [] fs = commands.getoutput('ls %skern\;substr*.mat' % km_dir).split("\n") for f in fs: km = loadmat(f) km_list.append(km['K']) fs = commands.getoutput('ls %skern\;phylpro*.mat' % km_dir).split("\n") for f in fs: km = loadmat(f) km_list.append(km['K']) fs = commands.getoutput('ls %skm_evalue*.mat' % km_dir).split("\n") for f in fs: km = loadmat(f) km_list.append(km['K']) n_samples = y.shape[0] n_km = len(km_list) y_pred = np.zeros(n_samples) n_labels = 1 tags = np.loadtxt("../data/cv/"+data+".cv") for fold in range(1,6): test_ind = np.where(tags == fold)[0] train_ind = np.where(tags != fold)[0] train_km_list = [] test_km_list = [] train_y = y[train_ind] test_y = y[test_ind] n_train = len(train_ind) n_test = len(test_ind) w_f = "../svm_result/weights/"+data+"_fold_%d_%s.weights" % (fold,mkl) if mkl == 'UNIMKL': w = UNIMKL(n_km, n_labels).ravel() else: w = np.loadtxt(w_f, ndmin=2).ravel() normw = np.linalg.norm(w, 2, 0) uni = np.ones(n_km) / np.linalg.norm(np.ones(n_km)) if normw == 0: w = uni else: w = w / normw for km in km_list: kc = KernelCenterer() train_km = km[np.ix_(train_ind, train_ind)] test_km = km[np.ix_(test_ind, train_ind)] # center train and test kernels kc.fit(train_km) train_km_c = kc.transform(train_km) test_km_c = kc.transform(test_km) train_km_list.append(train_km_c) test_km_list.append(test_km_c) train_ckm = np.zeros((n_train,n_train)) for t in range(n_km): train_ckm = train_ckm + w[t]*train_km_list[t] test_ckm = np.zeros(test_km_list[0].shape) for t in range(n_km): test_ckm = test_ckm + w[t]*test_km_list[t] C_range = [0.01,0.1,1,10,100] param_grid = dict(C=C_range) cv = StratifiedShuffleSplit(train_y,n_iter=5,test_size=0.2,random_state=42) grid = GridSearchCV(SVC(kernel='precomputed'), param_grid=param_grid, cv=cv) grid.fit(train_ckm, train_y) bestC = grid.best_params_['C'] svm = SVC(kernel='precomputed', C=bestC) svm.fit(train_ckm, train_y) y_pred[test_ind] = svm.predict(test_ckm) pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl) np.savetxt(pred_f, y_pred, fmt="%d") elif data in image_datasets: y = np.loadtxt(km_dir+"y.txt",ndmin=2) p = np.sum(y==1,0)/float(y.shape[0]) y = y[:,p>t] linear_km_list = [] for i in range(1,16): name = 'kernel_linear_%d.txt' % i km_f = km_dir+name km = np.loadtxt(km_f) # normalize input kernel !!!!!!!! linear_km_list.append(normalize_km(km)) pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl) pred = svm_mkl(linear_km_list, y, mkl, 5, data,data) np.savetxt(pred_f, pred, fmt="%d") elif data == 'SPAMBASE': y = np.loadtxt(km_dir+"y.txt",ndmin=2) rbf_km_list = [] gammas = [2**-9, 2**-8, 2**-7, 2**-6, 2**-5, 2**-4, 2**-3] X = np.loadtxt(km_dir+"x.txt") scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) X = preprocessing.normalize(X) for gamma in gammas: km = rbf_kernel(X, gamma=gamma) rbf_km_list.append(km) pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl) pred = svm_mkl(rbf_km_list, y, mkl, 5, data,data) np.savetxt(pred_f, pred, fmt="%d") else: rbf_km_list = [] gammas = [2**-13,2**-11,2**-9,2**-7,2**-5,2**-3,2**-1,2**1,2**3] X = np.loadtxt(km_dir+"x.txt") scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) X = preprocessing.normalize(X) y = np.loadtxt(km_dir+"y.txt") p = np.sum(y==1,0)/float(y.shape[0]) y = y[:,p>t] for gamma in gammas: km = rbf_kernel(X, gamma=gamma) # normalize input kernel !!!!!!!! rbf_km_list.append(km) pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl) pred = svm_mkl(rbf_km_list, y, mkl, 5, data,data) np.savetxt(pred_f, pred, fmt="%d")