def test_nystroem_default_parameters(): rnd = np.random.RandomState(42) X = rnd.uniform(size=(10, 4)) # rbf kernel should behave as gamma=None by default # aka gamma = 1 / n_features nystroem = Nystroem(n_components=10) X_transformed = nystroem.fit_transform(X) K = rbf_kernel(X, gamma=None) K2 = np.dot(X_transformed, X_transformed.T) assert_array_almost_equal(K, K2) # chi2 kernel should behave as gamma=1 by default nystroem = Nystroem(kernel='chi2', n_components=10) X_transformed = nystroem.fit_transform(X) K = chi2_kernel(X, gamma=1) K2 = np.dot(X_transformed, X_transformed.T) assert_array_almost_equal(K, K2)
def test_nystroem_poly_kernel_params(): """Non-regression: Nystroem should pass other parameters beside gamma.""" rnd = np.random.RandomState(37) X = rnd.uniform(size=(10, 4)) K = polynomial_kernel(X, degree=3.1, coef0=.1) nystroem_random = Nystroem(kernel="polynomial", n_components=X.shape[0], degree=3.1, coef0=.1, basis_method="random") nystroem_k_means = Nystroem(kernel="polynomial", n_components=X.shape[0], degree=3.1, coef0=.1, basis_method="k_means") X_transformed_k_means = nystroem_k_means.fit_transform(X) X_transformed_random = nystroem_random.fit_transform(X) assert_array_almost_equal(np.dot(X_transformed_k_means, X_transformed_k_means.T), K) assert_array_almost_equal(np.dot(X_transformed_random, X_transformed_random.T), K)
def test_nystroem_poly_kernel_params(): # Non-regression: Nystroem should pass other parameters beside gamma. rnd = np.random.RandomState(37) X = rnd.uniform(size=(10, 4)) K = polynomial_kernel(X, degree=3.1, coef0=.1) nystroem = Nystroem(kernel="polynomial", n_components=X.shape[0], degree=3.1, coef0=.1) X_transformed = nystroem.fit_transform(X) assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)
class SparseKernelClassifier(CDClassifier): def __init__(self, mode='exact', kernel='rbf', gamma=1e-3, C=1, alpha=1, n_components=500, n_jobs=1, verbose=False): self.mode = mode self.kernel = kernel self.gamma = gamma self.C = C self.alpha = alpha self.n_components = n_components self.n_jobs = n_jobs self.verbose = verbose super(SparseKernelClassifier, self).__init__( C=C, alpha=alpha, loss='squared_hinge', penalty='l1', multiclass=False, debiasing=True, Cd=C, warm_debiasing=True, n_jobs=n_jobs, verbose=False, ) def fit(self, X, y): if self.mode == 'exact': K = pairwise_kernels( X, metric=self.kernel, filter_params=True, gamma=self.gamma ) self.X_train_ = X else: self.kernel_sampler_ = Nystroem( kernel=self.kernel, gamma=self.gamma, n_components=self.n_components ) K = self.kernel_sampler_.fit_transform(X) super(SparseKernelClassifier, self).fit(K, y) return self def decision_function(self, X): if self.mode == 'exact': K = pairwise_kernels( X, self.X_train_, metric=self.kernel, filter_params=True, gamma=self.gamma ) else: K = self.kernel_sampler_.transform(X) return super(SparseKernelClassifier, self).decision_function(K)
def test_nystroem_vs_sklearn(): np.random.seed(42) X = np.random.randn(100, 5) kernel = Nystroem(kernel='linear', random_state=42) kernelR = NystroemR(kernel='linear', random_state=42) y1 = kernel.fit_transform([X])[0] y2 = kernelR.fit_transform(X) assert_array_almost_equal(y1, y2)
class WeightedSparseKernelClassifier(LinearSVC): def __init__( self, mode='exact', kernel='rbf', gamma=1e-3, C=1, multi_class='ovr', class_weight='auto', n_components=5000, verbose=False ): self.mode = mode self.kernel = kernel self.gamma = gamma self.C = C self.multi_class = multi_class self.class_weight = class_weight self.n_components = n_components self.verbose = verbose super(WeightedSparseKernelClassifier, self).__init__( C=C, loss='squared_hinge', penalty='l1', dual=False, verbose=verbose ) def fit(self, X, y): if self.mode == 'exact': K = pairwise_kernels( X, metric=self.kernel, filter_params=True, gamma=self.gamma ) self.X_train_ = X else: self.kernel_sampler_ = Nystroem( kernel=self.kernel, gamma=self.gamma, n_components=self.n_components ) K = self.kernel_sampler_.fit_transform(X) return super(WeightedSparseKernelClassifier, self).fit(K, y) def decision_function(self, X): if self.mode == 'exact': K = pairwise_kernels( X, self.X_train_, metric=self.kernel, filter_params=True, gamma=self.gamma ) else: K = self.kernel_sampler_.transform(X) return super(WeightedSparseKernelClassifier, self).decision_function(K)
class LSH(): def __init__(self, r=0.1, num_functions=50, dimensionality=128, gamma=1): self.feature_map_LSH = discreteLSH(r, num_functions, dimensionality) self.feature_map_nystroem = Nystroem(kernel='rbf', gamma=gamma, n_components=dimensionality) def set_params(self, r=0.1, num_functions=50, dimensionality=128, gamma=1): self.feature_map_LSH = discreteLSH(r, num_functions, dimensionality) self.feature_map_nystroem = Nystroem(kernel='rbf', gamma=gamma, n_components=dimensionality) def transform(self, X): Xl = self.feature_map_nystroem.fit_transform(X) return self.feature_map_LSH.transform(Xl)
def ApplyNystroemOnKernelMatrix(x, kernelFn, nComponents): """ Given a data matrix (each row is an observation, each column is a variable) and a kernel function, compute the Nystroem approximation of its uncentered Kernel matrix. :param x: numpy matrix. Data matrix. :param kernelFn: callable function. Returned by calling KernelSelector(). :param nComponents: integer. Number of ranks retained in Nystroem method. :return numpy matrix. """ nystroem = Nystroem(kernelFn, n_components=nComponents) return np.matrix(nystroem.fit_transform(x))
class LocalitySensitiveHash(): def __init__(self, r=0.1, num_functions=50, dimensionality=128, gamma=1): self.feature_map_LSH = DiscreteLocalitySensitiveHash(r, num_functions, dimensionality) self.feature_map_nystroem = Nystroem(kernel='rbf', gamma=gamma, n_components=dimensionality) def set_params(self, r=0.1, num_functions=50, dimensionality=128, gamma=1): self.feature_map_LSH = DiscreteLocalitySensitiveHash(r, num_functions, dimensionality) self.feature_map_nystroem = Nystroem(kernel='rbf', gamma=gamma, n_components=dimensionality) def transform(self, data_matrix): data_matrix_dense = self.feature_map_nystroem.fit_transform(data_matrix) return self.feature_map_LSH.transform(data_matrix_dense)
def sk_nystrom(): clf = svm.LinearSVC() print(clf) X, y = datasets.load_digits(n_class=9, return_X_y=True)#1617 samples print(len(X)) # print(len(y)) data = X / 16. print(data) feature_map_nystroem = Nystroem(gamma=.2,random_state = 1,n_components = 300) # print(feature_map_nystroem) data_transformed = feature_map_nystroem.fit_transform(sample) print(clf.fit(data_transformed, y)) print(clf.score(data_transformed, y)) print(data_transformed) return
def gram_Nystroem(self, x, nComponents): """ Nystroem approximation of the kernel matrix given data. No centering. :type x: 2d array, with size n * p :param x: data matrix for the covariates belonging to the same group, associated with the given matrix. :type nComponents: int :param nComponents: number of rank to retain :return: approximated kernel matrix with reduced rank, with size n * nComponents """ nystroem = Nystroem(self.fn, n_components=nComponents) return nystroem.fit_transform(x)
class LocalitySensitiveHash(): def __init__(self, r=0.1, num_functions=50, dimensionality=128, gamma=1): self.feature_map_LSH = DiscreteLocalitySensitiveHash( r, num_functions, dimensionality) self.feature_map_nystroem = Nystroem(kernel='rbf', gamma=gamma, n_components=dimensionality) def set_params(self, r=0.1, num_functions=50, dimensionality=128, gamma=1): self.feature_map_LSH = DiscreteLocalitySensitiveHash( r, num_functions, dimensionality) self.feature_map_nystroem = Nystroem(kernel='rbf', gamma=gamma, n_components=dimensionality) def transform(self, data_matrix): data_matrix_dense = self.feature_map_nystroem.fit_transform( data_matrix) return self.feature_map_LSH.transform(data_matrix_dense)
def test_lndmrk_nystroem_approximation(): np.random.seed(42) X = np.random.randn(100, 5) u = np.arange(X.shape[0])[5::1] v = np.arange(X.shape[0])[::1][:u.shape[0]] lndmrks = X[np.unique((u, v))] kernel = LandmarkNystroem(kernel='rbf', random_state=42) kernelR = NystroemR(kernel='rbf', random_state=42) y1_1 = kernel.fit_transform([X])[0] kernel.landmarks = lndmrks y1_2 = kernel.fit_transform([X])[0] y2 = kernelR.fit_transform(X) assert_array_almost_equal(y2, y1_1) assert not all((np.abs(y2 - y1_2) > 1E-6).flatten())
def compute_kernel(self, X, Y=None, gamma=None, *args, **kwargs): # estimate gamma if None if gamma is None: gamma = estimate_gamma(X) # initialize RBF kernel nystrom_kernel = Nystroem( gamma=gamma, kernel=self.kernel, n_components=self.n_components, coef0=self.coef0, degree=self.degree, random_state=self.random_state, *args, **kwargs, ) # transform data return nystrom_kernel.fit_transform(X)
def train(self, train_x, train_y, valid_x=None, valid_y=None): """モデルの学習を行う関数 Args: train_x(pd.DataFrame of [n_samples, n_features]): 学習データの特徴量 train_y(1-D array-like shape of [n_samples]): 学習データのラベル配列 valid_x(array-like shape of [n_samples, n_features]): バリデーションデータの特徴量 valid_y(1-D array-like shape of [n_samples]): バリデーションデータのラベル配列 """ # データのスケーリング # スケールするカラムを指定 if self.features_to_scale is None: self.features_to_scale = train_x.columns # スケーラを作成 scaler = StandardScaler() scaler.fit(train_x[self.features_to_scale]) # スケーリングを実行 train_x.loc[:, self.features_to_scale] = scaler.transform( train_x[self.features_to_scale]) # 特徴量のサブサンプルでのカーネル変換(featureが多いため、普通にSVMやると遅すぎる) kernel_mapper = Nystroem(gamma=.2, random_state=config.RANDOM_SEED, n_components=300) train_x_mapped = kernel_mapper.fit_transform(train_x) # モデルの構築・学習 model = SVC( **self.params ) # probability=Trueじゃないと確率を返さずpredictメソッドが使えないため、常にTrueにする model = model.fit(train_x_mapped, train_y) # モデル・スケーラーを保持する self.model = model self.kernel_mapper = kernel_mapper self.scaler = scaler
class KernelRegressionPolicy(UpperLevelPolicy): """Linear policy in approximated kernel space. A linear policy in kernel space is learned. In order to keep computation and risk of overfitting limited, a low-dimensional approximation of the kernel space is used, which is determined by the Nystroem approximation. Thus, an explicit feature map is learned based on the training data. This has the advantage compared to predefined feature maps that the features are adaptive. Parameters ---------- weight_dims: int dimensionality of weight vector of lower-level policy context_dims: int dimensionality of context vector kernel : string or callable (default: "rbf") Kernel map to be approximated. A callable should accept two arguments and the keyword arguments passed to this object as kernel_params, and should return a floating point number. gamma : float (default: None) Gamma parameter for the RBF, polynomial, exponential chi2 and sigmoid kernels. Interpretation of the default value is left to the kernel; see the documentation for sklearn.metrics.pairwise. Ignored by other kernels. coef0 : float (default: 1.5) The coef0 parameter for the kernels. Interpretation of the value is left to the kernel; see the documentation for sklearn.metrics.pairwise. Ignored by other kernels. n_components: int (default: 20) The number of components used in the Nystroem approximation of the kernel covariance_scale: float (default: 1.0) the covariance is initialized to numpy.eye(weight_dims) * covariance_scale. alpha: float (default: 0.0) Controlling the L2-regularization in the ridge regression for learning of the policy's weights bias: bool (default: True) Whether a constant bias dimension is added to the approximated kernel space. This allows learning offsets more easily. normalize: bool (default: True) Whether the activations in the approximated kernel space are normalized. This should improve generalization beyond the boundaries of the observed context space. random_state : optional, int Seed for the random number generator. :Author: Jan Hendrik Metzen ([email protected]) :Created: 2014/11/20 """ def __init__( self, weight_dims, context_dims, kernel="rbf", gamma=None, coef0=1.5, n_components=20, covariance_scale=1.0, alpha=0.0, bias=True, normalize=True, random_state=None, ): self.weight_dims = weight_dims self.context_dims = context_dims self.kernel = kernel self.gamma = gamma self.coef0 = coef0 self.n_components = n_components self.alpha = alpha self.bias = bias self.normalize = normalize self.Sigma = np.eye(weight_dims) * covariance_scale self.random_state = check_random_state(random_state) def __call__(self, context, explore=True): """Evaluates policy for given contexts. Samples weight vector from distribution if explore is true, otherwise return the distribution's mean (which depends on the context). Parameters ---------- contexts: array-like, [n_contexts, context_dims] context vector explore: bool if true, weight vector is sampled from distribution. otherwise the distribution's mean is returned """ X = self.nystroem.transform(context) if self.bias: X = np.hstack((X, np.ones((X.shape[0], 1)))) if self.normalize: X /= np.abs(X).sum(1)[:, None] mean = np.dot(X, self.W.T) if not explore: return mean[0] else: sample_func = lambda x: self.random_state.multivariate_normal(x, self.Sigma, size=[1])[0] samples = np.apply_along_axis(sample_func, 1, mean)[0] return samples def fit(self, X, Y, weights=None, context_transform=True): """ Trains policy by weighted maximum likelihood. .. note:: This call changes this policy (self) Parameters ---------- X: array-like, shape (n_samples, context_dims) Context vectors Y: array-like, shape (n_samples, weight_dims) Low-level policy parameter vectors weights: array-like, shape (n_samples,) Weights of individual samples (should depend on the obtained reward) """ # Kernel approximation self.nystroem = Nystroem( kernel=self.kernel, gamma=self.gamma, coef0=self.coef0, n_components=np.minimum(X.shape[0], self.n_components), random_state=self.random_state, ) self.X = self.nystroem.fit_transform(X) if self.bias: self.X = np.hstack((self.X, np.ones((self.X.shape[0], 1)))) if self.normalize: self.X /= np.abs(self.X).sum(1)[:, None] # Standard ridge regression ridge = Ridge(alpha=self.alpha, fit_intercept=False) ridge.fit(self.X, Y, weights) self.W = ridge.coef_
start_time = time.time() RFF = RBFSampler(gamma=1,n_components= int(num_of_samples*sampling_percentage)) V = RFF.fit_transform(X) RFF_estimated_kernel = V.dot(V.T) print("--- RFF Time : %s seconds ---" % (time.time() - start_time)) start_time = time.time() N = Nystroem(gamma=1,n_components= int(num_of_samples*sampling_percentage)) V = N.fit_transform(X) estimated_kernel = V.dot(V.T) print("--- Nystrom Time : %s seconds ---" % (time.time() - start_time)) start_time = time.time() real_kernel = sklearn.metrics.pairwise.rbf_kernel(X, gamma=1) print("--- Real Time : %s seconds ---" % (time.time() - start_time)) print estimated_kernel[0:5, 0:5] print '\n\n' print real_kernel[0:5, 0:5] print '\n\n' print RFF_estimated_kernel[0:5, 0:5]