class GPSamplePath(Function): def __init__(self, seed=1): self.dim = 1 self.bounds = [[-3, 3]] self.y_bounds = [-2, 2] super().__init__(self.dim, self.bounds, seed) self.fit() self.min, self.max = self.get_min_max() res = minimize(self.value_std, self.bounds, maxf=self.dim * 1000, algmethod=1) self.x_opt = res['x'][0] self.y_opt = -self.value_std(self.x_opt) def base_function(self, x): res = (6 * x - 2)**2 * np.sin(12 * x - 4) return res def fit(self): X = np.linspace(self.bounds[0][0], self.bounds[0][1], 3) Y = np.random.uniform(self.y_bounds[0], self.y_bounds[1], 3) X = X.reshape(-1, 1) self.rbf_feature = RBFSampler(gamma=1, n_components=30) self.rbf_feature.fit(np.atleast_2d(X[0])) phi_X = self.rbf_feature.transform(X) self.w = np.linalg.inv(phi_X.T.dot(phi_X)).dot(phi_X.T).dot(Y) def get_min_max(self): X = np.linspace(self.bounds[0][0], self.bounds[0][1], 10000) Y = self.value(X) return np.min(Y), np.max(Y) def value(self, x): x = x.reshape(-1, 1) res = self.rbf_feature.transform(x).dot(self.w) return res def value_std(self, x): res = self.value(x) res = (res - self.min) / (self.max - self.min) return res def get_pool(self, K): return np.linspace(self.bounds[0][0], self.bounds[0][1], K) def plot(self): x_range = np.linspace(self.bounds[0][0], self.bounds[0][1], 100) y = self.value_std(x_range) plt.plot(x_range, y) plt.show()
class RKHSfunction(): # easier to use with random features def __init__(self, kernel_gamma, seed=1, n_feat=96): self.kernel_gamma = kernel_gamma self.n_feat = n_feat self.model = nn.Sequential( Flatten(), nn.Linear(n_feat, 1, bias=True) ) # random feature. the param of this models are the weights, i.e., decision var self.seed = seed self.rbf_feature = RBFSampler( gamma=kernel_gamma, n_components=n_feat, random_state=seed) # only support Gaussian RKHS for now # self.rbf_feature.fit(X_example.view(X_example.shape[0], -1)) def eval(self, X, fit=False): x_reshaped = (X.view(X.shape[0], -1)) if fit: self.rbf_feature.fit( x_reshaped) # only transform during evaluation if not x_reshaped.requires_grad: # x_feat = self.rbf_feature.fit_transform(x_reshaped) x_feat = self.rbf_feature.transform( x_reshaped) # only transform during evaluation rkhsF = self.model(torch.from_numpy(x_feat).float()) else: # raise NotImplementedError # print('need a pth impl of fit transform') # internally: self.fit(X, **fit_params).transform(X) x_detach = x_reshaped.detach() x_fitted = self.rbf_feature.fit(x_detach, y=None) # x_fitted.transform(x_reshaped) x_feat = pth_transform(x_fitted, x_reshaped) # assert torch.max(x_feat.detach() - self.rbf_feature.fit_transform(x_detach)) == 0 # there's randomness of course rkhsF = self.model(x_feat)[:, 0] return rkhsF def norm(self): return computeRKHSNorm(self.model) def set_seed(self, seed): # set the seed of RF. such as in doubly SGD self.seed = seed self.rbf_feature = RBFSampler( gamma=self.kernel_gamma, n_components=self.n_feat, random_state=seed) # only support Gaussian RKHS for now def __call__(self, X, fit=False, random_state=False): if random_state is True: self.set_seed( seed=np.random) # do a random seed reset for doubly stochastic # else: # self.set_seed(seed=1) return self.eval(X, fit)
def computeKernelMatrix(self, Graphs): print "Computing gram matrix" #self.treekernelfunction=tree_kernels_STonlyroot_FeatureVector.STKernel(self.Lambda, labels=self.labels,veclabels=self.veclabels,order=self.order) #Preprocessing step: approximation of RBF with explicit features. #Add a field to every node "veclabel_explicit_rbf" labels = set() for g in Graphs: for _, d in g.nodes(data=True): #print d labels.add(tuple(d['veclabel'])) #print len(labels) labels_list = [list(l) for l in labels] ## labels=set() # labels_list=[] # for g in Graphs: # for _,d in g.nodes(data=True): # #print d # labels_list.append(list(d['veclabel'])) #print len(labels) #labels_list=[list(l) for l in labels] print "Size of labels matrix:", len(labels_list), len(labels_list[0]) feature_map_fourier = RBFSampler(gamma=(1.0 / len(labels_list[0])), random_state=1, n_components=self.n_comp) #feature_map_fourier = Nystroem(gamma=(1.0/len(labels_list[0])), random_state=1,n_components=250) feature_map_fourier.fit(labels_list) for g in Graphs: for n, d in g.nodes(data=True): g.node[n]['veclabel_rbf'] = feature_map_fourier.transform( d['veclabel'])[0] #.tolist() print "RBF approximation finished." #print Graphs[0].node[1]['veclabel_rbf'] Gram = np.empty(shape=(len(Graphs), len(Graphs))) progress = 0 FeatureMaps = [] for i in xrange(0, len(Graphs)): FeatureMaps.append( self.generateGraphFeatureMap(Graphs[i], self.max_radius)) print "FeatureVectors calculated" for i in xrange(0, len(Graphs)): for j in xrange(i, len(Graphs)): #print i,j progress += 1 Gram[i][j] = self._kernelFunctionFeatureVectors( FeatureMaps[i], FeatureMaps[j]) Gram[j][i] = Gram[i][j] if progress % 1000 == 0: print "k", sys.stdout.flush() elif progress % 100 == 0: print ".", sys.stdout.flush() return Gram
class Model: def __init__(self, grid): # fit the featurizer to data samples = gather_samples(grid) # self.featurizer = Nystroem() self.featurizer = RBFSampler() self.featurizer.fit(samples) dims = self.featurizer.n_components # initialize linear model weights self.w = np.zeros(dims) def predict(self, s): x = self.featurizer.transform([s])[0] return x @ self.w def grad(self, s): x = self.featurizer.transform([s])[0] return x
def test_classifier_regularization(normalize, loss): rng = np.random.RandomState(0) transformer = RBFSampler(n_components=100, random_state=0, gamma=10) transformer.fit(X) X_trans = transformer.transform(X) if normalize: X_trans = StandardScaler().fit_transform(X_trans) y, coef = generate_target(X_trans, rng, -0.1, 0.1) y_train = y[:n_train] y_test = y[n_train:] y_train = np.sign(y_train) y_test = np.sign(y_test) # overfitting clf = SGDClassifier(transformer, max_iter=500, warm_start=True, verbose=False, fit_intercept=True, loss=loss, alpha=0.00001, intercept_decay=1e-10, random_state=0, tol=0, normalize=normalize) clf.fit(X_train[:100], y_train[:100]) train_acc = clf.score(X_train[:100], y_train[:100]) assert train_acc >= 0.95 # underfitting clf_under = SGDClassifier(transformer, max_iter=100, warm_start=True, verbose=False, fit_intercept=True, loss=loss, alpha=10000, random_state=0, normalize=normalize) clf_under.fit(X_train, y_train) assert np.sum(clf_under.coef_**2) < np.sum(clf.coef_**2) # l1 regularization clf_l1 = SGDClassifier(transformer, max_iter=100, warm_start=True, verbose=False, fit_intercept=True, loss=loss, alpha=1000, l1_ratio=0.9, random_state=0, normalize=normalize) clf_l1.fit(X_train, y_train) assert_almost_equal(np.sum(np.abs(clf_l1.coef_)), 0)
def ridge_gamma(data, log_gamma): alpha = 5.0e-07 #2.0e-06#6.25e-07 # sigma^2/n gamma = np.exp(log_gamma) print('Training with alpha:{}, gamma:{}'.format(alpha, gamma)) np.random.seed(23) rbf_feature = RBFSampler(gamma=gamma, n_components=200) trans_tr_x = rbf_feature.fit_transform(data.train_x) trans_test_x = rbf_feature.transform(data.test_x) clf = Ridge(alpha=alpha) clf.fit(trans_tr_x, data.train_y) score = clf.score(trans_test_x, data.test_y) return max(score, -1.0)
def test_rbf_sampler(): """test that RBFSampler approximates kernel on random data""" # compute exact kernel gamma = 10. kernel = rbf_kernel(X, Y, gamma=gamma) # approximate kernel mapping rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42) X_trans = rbf_transform.fit_transform(X) Y_trans = rbf_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) assert_array_almost_equal(kernel, kernel_approx, 1)
class _RBFSamplerImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
def test_regressor_regularization(normalize, loss): rng = np.random.RandomState(0) transformer = RBFSampler(n_components=100, random_state=0, gamma=10) transformer.fit(X) X_trans = transformer.transform(X) if normalize: X_trans = StandardScaler().fit_transform(X_trans) y, coef = generate_target(X_trans, rng, -0.1, 0.1) y_train = y[:n_train] y_test = y[n_train:] # overfitting clf = SAGARegressor(transformer, max_iter=300, warm_start=True, verbose=False, fit_intercept=True, loss=loss, alpha=0.0001, intercept_decay=1e-6, random_state=0, tol=0, normalize=normalize) clf.fit(X_train[:100], y_train[:100]) l2 = np.mean((y_train[:100] - clf.predict(X_train[:100]))**2) assert l2 < 0.01 # underfitting clf_under = SAGARegressor(transformer, max_iter=100, warm_start=True, verbose=False, fit_intercept=True, loss=loss, alpha=100000, random_state=0, normalize=normalize) clf_under.fit(X_train, y_train) assert np.sum(clf_under.coef_ ** 2) < np.sum(clf.coef_ ** 2) # l1 regularization clf_l1 = SAGARegressor(transformer, max_iter=100, warm_start=True, verbose=False, fit_intercept=True, loss=loss, alpha=1000, l1_ratio=0.9, random_state=0, normalize=normalize) clf_l1.fit(X_train, y_train) assert_almost_equal(np.sum(np.abs(clf_l1.coef_)), 0) # comparison with sgd sgd = SGDRegressor(alpha=0.01, max_iter=100, eta0=1, learning_rate='constant', fit_intercept=True, random_state=0) sgd.fit(X_trans[:n_train], y_train) test_l2_sgd = np.mean((y_test - sgd.predict(X_trans[n_train:]))**2) clf = SAGARegressor(transformer, max_iter=100, warm_start=True, verbose=False, fit_intercept=True, loss=loss, alpha=0.01, random_state=0, normalize=normalize, ) clf.fit(X_train, y_train) test_l2 = np.mean((y_test - clf.predict(X_test))**2) assert test_l2 < test_l2_sgd
class Model: def __init__(self, env): # fit the featurizer to data self.env = env samples = gather_samples(env) self.featurizer = RBFSampler() self.featurizer.fit(samples) dims = self.featurizer.n_components # initialize linear model weights self.w = np.zeros(dims) def predict(self, s, a): sa = np.concatenate((s, [a])) x = self.featurizer.transform([sa])[0] return x @ self.w def predict_all_actions(self, s): return [self.predict(s, a) for a in range(self.env.action_space.n)] def grad(self, s, a): sa = np.concatenate((s, [a])) x = self.featurizer.transform([sa])[0] return x
def svm(data, log_C, log_gamma): lb = data.lb train_y = lb.inverse_transform(data.train_y) test_y = lb.inverse_transform(data.test_y) print('Running SVM') C = np.exp(log_C) gamma = np.exp(log_gamma) print('Training with C:{}, gamma:{}'.format(C, gamma)) rbf_feature = RBFSampler(gamma=gamma, n_components=50, random_state=0) trans_tr_x = rbf_feature.fit_transform(data.train_x) trans_test_x = rbf_feature.transform(data.test_x) clf = LinearSVC(C=C) clf = CalibratedClassifierCV(clf) clf.fit(trans_tr_x, train_y) return clf.score(trans_test_x, test_y)
class Model: def __init__(self, grid): # fit the featurizer to data samples = gather_samples(grid) # self.featurizer = Nystroem() self.featurizer = RBFSampler() self.featurizer.fit(samples) dims = self.featurizer.n_components # initialize linear model weights self.w = np.zeros(dims) def predict(self, s, a): sa = merge_state_action(s, a) x = self.featurizer.transform([sa])[0] return x @ self.w def predict_all_actions(self, s): return [self.predict(s, a) for a in ALL_POSSIBLE_ACTIONS] def grad(self, s, a): sa = merge_state_action(s, a) x = self.featurizer.transform([sa])[0] return x
def test_rbf_sampler(): # test that RBFSampler approximates kernel on random data # compute exact kernel gamma = 10. kernel = rbf_kernel(X, Y, gamma=gamma) # approximate kernel mapping rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42) X_trans = rbf_transform.fit_transform(X) Y_trans = rbf_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert_less_equal(np.abs(np.mean(error)), 0.01) # close to unbiased np.abs(error, out=error) assert_less_equal(np.max(error), 0.1) # nothing too far off assert_less_equal(np.mean(error), 0.05) # mean is fairly close
def dim_ridge(data, log_alpha, log_bw1, log_bw2, log_bw3, log_bw4, log_bw5, log_bw6): alpha = np.exp(log_alpha) bw1 = np.exp(log_bw1) bw2 = np.exp(log_bw2) bw3 = np.exp(log_bw3) bw4 = np.exp(log_bw4) bw5 = np.exp(log_bw5) bw6 = np.exp(log_bw6) bw = np.array([bw1, bw2, bw3, bw4, bw5, bw6]) print('Training with alpha:{}, bw:{}'.format(alpha, bw)) rbf_feature = RBFSampler(gamma=0.5, n_components=200, random_state=0) trans_tr_x = rbf_feature.fit_transform(np.divide(data.train_x, bw)) trans_test_x = rbf_feature.transform(np.divide(data.test_x, bw)) clf = Ridge(alpha=alpha) clf.fit(trans_tr_x, data.train_y) score = clf.score(trans_test_x, data.test_y) return max(score, -1.0)
def logistic(data, log_C, log_gamma): lb = data.lb train_y = lb.inverse_transform(data.train_y) test_y = lb.inverse_transform(data.test_y) print('Running Logistic Regression') C = np.exp(log_C) gamma = np.exp(log_gamma) print('Training with C:{}, gamma:{}'.format(C, gamma)) rbf_feature = RBFSampler(gamma=gamma, n_components=200, random_state=0) trans_tr_x = rbf_feature.fit_transform(data.train_x) trans_test_x = rbf_feature.transform(data.test_x) clf = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial', C=C) clf.fit(trans_tr_x, train_y) te_predict = clf.predict_proba(trans_test_x) return roc_auc_score(data.test_y, te_predict)
class ValueFunction(object): """ Value Funciton approximator. """ def __init__(self): # sampleing envrionment state in order to featurize it. state_samples = np.array( [env.observation_space.sample() for x in range(10000)]) # Standardize features by removing the mean and scaling to unit variance self.scaler = StandardScaler() self.scaler.fit(state_samples) scaler_samples = scaler.transform(state_samples) # Approximates feature map of an RBF kernel # by Monte Carlo approximation of its Fourier transform. self.featurizer_state = RBFSampler(gamma=0.5, n_components=100) self.featurizer_state.fit(scaler_samples) # action model for SGD regressor self.action_models = [] nA = env.action_space.n for na in range(nA): # Linear classifiers with SGD training. model = SGDRegressor(learning_rate="constant") model.partial_fit([self.__featurize_state(env.reset())], [0]) self.action_models.append(model) # print(self.action_models) def __featurize_state(self, state): scaler_state = self.scaler.transform([state]) return self.featurizer_state.transform(scaler_state)[0] def predict(self, state): curr_features = self.__featurize_state(state) action_probs = np.array( [m.predict([curr_features])[0] for m in self.action_models]) # print(action_probs) return action_probs def update(self, state, action, y): curr_features = self.__featurize_state(state) self.action_models[action].partial_fit([curr_features], [y])
def train_models(X_train, y_train, X_test, y_test): clf = linear_model.SGDClassifier(penalty='elasticnet') print clf print "fitting a linear elasticnet (L1+L2 regularized linear classif.) with SGD" clf = clf.fit(X_train, y_train) print "score on the training set", clf.score(X_train, y_train) print "score on 80/20 split", clf.score(X_test, y_test) rbf_feature = RBFSampler(gamma=1, random_state=1) X_train_feats = rbf_feature.fit_transform(X_train) X_test_feats = rbf_feature.transform(X_test) print "fitting a linear elasticnet with SGD on RBF sampled features" clf = clf.fit(X_train_feats, y_train) print "score on the training set", clf.score(X_train_feats, y_train) print "score on 80/20 split", clf.score(X_test_feats, y_test) clf2 = RandomForestClassifier(max_depth=None, min_samples_split=3) print clf2 print "fitting a random forest" clf2 = clf2.fit(X_train, y_train) print "score on the training set", clf2.score(X_train, y_train) print "score on 80/20 split", clf2.score(X_test, y_test) clf3 = svm.SVC(kernel='linear') print clf3 print "fitting an SVM with a linear kernel" clf3 = clf3.fit(X_train, y_train) print "score on the training set", clf3.score(X_train, y_train) print "score on 80/20 split", clf3.score(X_test, y_test) clf4 = svm.SVC(kernel='rbf') print clf4 print "fitting an SVM with an RBF-kernel" clf4 = clf4.fit(X_train, y_train) print "score on the training set", clf4.score(X_train, y_train) print "score on 80/20 split", clf4.score(X_test, y_test) clf5 = linear_model.LogisticRegression(penalty='l1', tol=0.01) print clf5 print "fitting a logistic regression reg. with L1" clf5 = clf5.fit(X_train, y_train) print "score on the training set", clf5.score(X_train, y_train) print "score on 80/20 split", clf5.score(X_test, y_test)
def rbf_map(X_train=X_train_red, X_test=X_test_red, gamma=0.2, rbfsampler=True, n_components=100, scale=False): if rbfsampler: feature_map = RBFSampler(gamma=gamma, random_state=8, n_components=n_components) else: feature_map = Nystroem(gamma=gamma, random_state=8, n_components=n_components) X_train_mapped = feature_map.fit_transform(X_train) X_test_mapped = feature_map.transform(X_test) if scale: X_train_mapped, X_test_mapped = scale_data(X_train_mapped, X_test_mapped) return X_train_mapped, X_test_mapped
def dim_logistic(data, log_C, log_bw1, log_bw2, log_bw3, log_bw4, log_bw5, log_bw6): lb = data.lb train_y = lb.inverse_transform(data.train_y) test_y = lb.inverse_transform(data.test_y) C = np.exp(log_C) bw1 = np.exp(log_bw1) bw2 = np.exp(log_bw2) bw3 = np.exp(log_bw3) bw4 = np.exp(log_bw4) bw5 = np.exp(log_bw5) bw6 = np.exp(log_bw6) bw = np.array([bw1, bw2, bw3, bw4, bw5, bw6]) print('Training with C:{}, bw:{}'.format(C, bw)) rbf_feature = RBFSampler(gamma=0.5, n_components=200, random_state=0) trans_tr_x = rbf_feature.fit_transform(np.divide(data.train_x, bw)) trans_test_x = rbf_feature.transform(np.divide(data.test_x, bw)) clf = LogisticRegression(random_state=0, solver='lbfgs', C=C) clf.fit(trans_tr_x, train_y) te_predict = clf.predict_proba(trans_test_x) return roc_auc_score(data.test_y, te_predict)
def test_feature_map_equals_scikit_learn(): sigma = 2. gamma = sigma**2 N = 10 D = 20 m = 3 X = np.random.randn(N, D) np.random.seed(1) omega = sigma * np.random.randn(D, m) u = np.random.uniform(0, 2 * np.pi, m) # make sure basis is the same np.random.seed(1) rbf_sampler = RBFSampler(gamma, m, random_state=1) rbf_sampler.fit(X) assert_allclose(rbf_sampler.random_weights_, omega) assert_allclose(rbf_sampler.random_offset_, u) phi_scikit = rbf_sampler.transform(X) phi_mine = feature_map(X, omega, u) assert_allclose(phi_scikit, phi_mine)
def estimator_separate_train_test(train, test): X, Y = get_X_Y_from_csv(train) n = X.shape[0] Xtest, Ytest = get_X_Y_from_csv(test) rbf_feature = RBFSampler() X_features = rbf_feature.fit_transform(X) clf = SGDClassifier(class_weight="balanced") clf.fit(X_features, Y) Ypredict = clf.predict(X_features) Xtest_features = rbf_feature.transform(Xtest) Ytest_predict = clf.predict(Xtest_features) print('score on training and test sets: {:.2f} {:.2f}'.format( clf.score(X_features, Y), clf.score(Xtest_features, Ytest))) print('f1 score on training and test sets:{:.2f} {:.2f}'.format( metrics.f1_score(Y, Ypredict), metrics.f1_score(Ytest, Ytest_predict))) print('Precision on training and test sets:{:.2f} {:.2f}'.format( metrics.precision_score(Y, Ypredict), metrics.precision_score(Ytest, Ytest_predict))) print('Recall on training and test sets: {:.2f} {:.2f}'.format( metrics.recall_score(Y, Ypredict), metrics.recall_score(Ytest, Ytest_predict)))
class LinearRBF(Policy): ''' RBF features ''' def __init__(self, state_dim, action_dim, number_of_features): Policy.__init__(self, state_dim, action_dim) self.rbf_feature = RBFSampler(gamma=25., n_components=number_of_features) self.rbf_feature.fit(np.random.randn(action_dim, state_dim)) def set_theta(self, theta): self.theta = theta def get_action(self, state): features = self.rbf_feature.transform(state.reshape(1, -1)) action = features @ self.theta[:-self.action_dim].reshape( -1, self.action_dim) action = action + self.theta[-self.action_dim:] return action.reshape(-1) def get_number_of_parameters(self): return self.rbf_feature.get_params().get( "n_components") * self.action_dim + self.action_dim
class CCNNLayer: def __init__(self, name: str, input_size: int, filter_size: int, gamma: float, m: int, R: float, r: int, lr: float): self.name = name self.input_size = input_size self.filter_size = filter_size self.patch_size = filter_size ** 2 self.output_size = self.input_size - self.filter_size + 1 self.n_patchs = self.output_size ** 2 self.m = m self.R = R self.lr = lr self.rbf_feature = RBFSampler(gamma=gamma, n_components=m, random_state=1) self.svd = TruncatedSVD(n_components=r) def initPars(self, n_classes: int, batch_size: int): self.n_classes = n_classes self.batch_size = batch_size self.lr /= batch_size self.A = np.random.normal(0, 0.1, size=(n_classes, self.n_patchs, self.m)) def getZMatrix(self, X): """ Input: (n_instances, n_channels, input_size, input_size) Output: (n_instances, n_patchs, m) """ Z = view_as_windows(X, (1, X.shape[1], self.filter_size, self.filter_size)) Z = Z.reshape(np.prod(Z.shape[:4]), np.prod(Z.shape[4:])) Q = self.rbf_feature.transform(Z).astype(np.float16) return Q.reshape(X.shape[0], self.n_patchs, -1) def predict(self, X, transform: bool=False): """ Input: (batch_size, n_channels, input_size, input_size) Transformed input: (batch_size, n_patchs, m) Output: (batch_size, n_classes) """ Z = self.getZMatrix(X) if transform else X p = np.exp(np.tensordot(Z, self.A, axes=[(1, 2), (1, 2)])) return (p.T / np.sum(p, axis=1)).T def fit(self, X, ylabel, n_epoch: int): assert X.shape[2] == X.shape[3] == self.input_size n = X.shape[0] self.rbf_feature.fit(np.zeros((1, X.shape[1] * self.filter_size ** 2))) print("Preparing patches...") Z_batches = [self.getZMatrix(X[i: i + self.batch_size]) for i in range(0, n, self.batch_size)] y_batches = ylabel.reshape(-1, self.batch_size) print("Starting PSGD...") loss = np.inf rhat = self.m for epoch in range(n_epoch): print("{0}: Epoch {1}: loss = {2}, r_hat = {3}".format(self.name, epoch + 1, loss / n, rhat)) loss = 0 for i, (Z_batch, y_batch) in enumerate(zip(Z_batches, y_batches)): p_batch = self.predict(Z_batch) loss += np.sum(-np.log(p_batch[np.arange(self.batch_size), y_batch])) dL_batch = -p_batch dL_batch[np.arange(self.batch_size), y_batch] += 1 self.A += self.lr * np.tensordot(dL_batch, Z_batch, axes=[0, 0]) A_unfold = self.A.reshape(-1, self.A.shape[2]).T U = self.svd.fit_transform(A_unfold) self.U = U.copy() d = np.linalg.norm(U, axis=0) U *= 1 / d d_cum = np.cumsum(d) rhat = np.searchsorted(d_cum - self.R > np.append(d[1:] * np.arange(1, d.size), 0), True) + 1 if rhat >= d.size: print("Warning: Hard-thresholding applied") if rhat <= d.size: scale = np.maximum(0, d - (d_cum[rhat - 1] - self.R) / rhat) U = U[:, :rhat] d = d[:rhat] self.U = U * scale[:rhat] self.A = ((self.U * (1 / d)) @ (U.T @ A_unfold)).T.reshape(*self.A.shape) Z_batches = None y_batches = None def transform(self, X): """ Input: (batch_size, n_channels, input_size, input_size) Output: (batch_size, n_output_channels, output_size, output_size) """ Z = np.rollaxis(np.tensordot(self.U, self.getZMatrix(X), axes=[0, 2]), 0, 2) return Z.reshape(Z.shape[0], Z.shape[1], self.output_size, self.output_size)
XtestT = kpls.transform(ktest) if n==573: kplsScoresNys[:,0] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest) elif n==1073: kplsScoresNys[:,1] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest) elif n==1573: kplsScoresNys[:,2] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest) # RBF sampler method elapTimeRBFS = np.zeros(np.shape(nComponents)) kplsScoresRBFS = np.zeros((2,3)) for i,n in enumerate(nComponents): rbfs = RBFSampler(n_components=n,gamma=gamma) rbfs.fit(Xtrain) ktrain = rbfs.transform(Xtrain) ktest = rbfs.transform(Xtest) startTime = timeit.default_timer() kpls.fit(ktrain,Ytrain) elapTimeRBFS[i] = timeit.default_timer() - startTime XtrainT = kpls.transform(ktrain) XtestT = kpls.transform(ktest) if n==573: kplsScoresRBFS[:,0] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest) elif n==1073: kplsScoresRBFS[:,1] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest) elif n==1573: kplsScoresRBFS[:,2] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest) #%% Plot figures
class DecomposableKernel(object): r""" Decomposable Operator-Valued Kernel of the form: .. math:: X, Y \mapsto K(X, Y) = k_s(X, Y) A where A is a symmetric positive semidefinite operator acting on the outputs. Attributes ---------- A : {array, LinearOperator}, shape = [n_targets, n_targets] Linear operator acting on the outputs scalar_kernel : {callable} Callable which associate to the training points X the Gram matrix. scalar_kernel_params : {mapping of string to any} Additional parameters (keyword arguments) for kernel function passed as callable object. References ---------- See also -------- DecomposableKernelMap Decomposable Kernel map Examples -------- >>> import operalib as ovk >>> import numpy as np >>> X = np.random.randn(100, 10) >>> K = ovk.DecomposableKernel(np.eye(2)) >>> # The kernel matrix as a linear operator >>> K(X, X) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS <200x200 _CustomLinearOperator with dtype=float64> """ def __init__(self, A, scalar_kernel=rbf_kernel, scalar_kernel_params=None): """Initialize the Decomposable Operator-Valued Kernel. Parameters ---------- A : {array, LinearOperator}, shape = [n_targets, n_targets] Linear operator acting on the outputs scalar_kernel : {callable} Callable which associate to the training points X the Gram matrix. scalar_kernel_params : {mapping of string to any}, optional Additional parameters (keyword arguments) for kernel function passed as callable object. """ self.A = A self.scalar_kernel = scalar_kernel self.scalar_kernel_params = scalar_kernel_params self.p = A.shape[0] def get_kernel_map(self, X): r"""Return the kernel map associated with the data X. .. math:: K_x: Y \mapsto K(X, Y) Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- K_x : DecomposableKernelMap, callable .. math:: K_x: Y \mapsto K(X, Y). """ from .kernel_maps import DecomposableKernelMap return DecomposableKernelMap(X, self.A, self.scalar_kernel, self.scalar_kernel_params) def get_orff_map(self, X, D=100, eps=1e-5, random_state=0): r"""Return the Random Fourier Feature map associated with the data X. .. math:: K_x: Y \mapsto \tilde{\Phi}(X) Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- \tilde{\Phi}(X) : Linear Operator, callable """ u, s, v = svd(self.A, full_matrices=False, compute_uv=True) self.B_ = dot(diag(sqrt(s[s > eps])), v[s > eps, :]) self.r = self.B_.shape[0] if (self.scalar_kernel is rbf_kernel) and not hasattr(self, 'Xb_'): if self.scalar_kernel_params is None: gamma = 1. else: gamma = self.scalar_kernel_params['gamma'] self.phi_ = RBFSampler(gamma=gamma, n_components=D, random_state=random_state) self.phi_.fit(X) self.Xb_ = self.phi_.transform(X).astype(X.dtype) elif (self.scalar_kernel is 'skewed_chi2') and not hasattr(self, 'Xb_'): if self.scalar_kernel_params is None: skew = 1. else: skew = self.scalar_kernel_params['skew'] self.phi_ = SkewedChi2Sampler(skewedness=skew, n_components=D, random_state=random_state) self.phi_.fit(X) self.Xb_ = self.phi_.transform(X).astype(X.dtype) elif not hasattr(self, 'Xb_'): raise NotImplementedError('ORFF map for kernel is not ' 'implemented yet') D = self.phi_.n_components if X is self.Xb_: cshape = (D, self.r) rshape = (self.Xb_.shape[0], self.p) oshape = (self.Xb_.shape[0] * self.p, D * self.r) return LinearOperator(oshape, dtype=self.Xb_.dtype, matvec=lambda b: dot(dot(self.Xb_, b.reshape(cshape)), self.B_), rmatvec=lambda r: dot(Xb.T, dot(r.reshape(rshape), self.B_.T))) else: Xb = self.phi_.transform(X) cshape = (D, self.r) rshape = (X.shape[0], self.p) oshape = (Xb.shape[0] * self.p, D * self.r) return LinearOperator(oshape, dtype=self.Xb_.dtype, matvec=lambda b: dot(dot(Xb, b.reshape(cshape)), self.B_), rmatvec=lambda r: dot(Xb.T, dot(r.reshape(rshape), self.B_.T))) def __call__(self, X, Y=None): r"""Return the kernel map associated with the data X. .. math:: K_x: \begin{cases} Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\ K(X, Y) \enskip\text{otherwise.} \end{cases} Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples1, n_features] Samples. Y : {array-like, sparse matrix}, shape = [n_samples2, n_features], default = None Samples. Returns ------- K_x : DecomposableKernelMap, callable or LinearOperator .. math:: K_x: \begin{cases} Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\ K(X, Y) \enskip\text{otherwise} \end{cases} """ Kmap = self.get_kernel_map(X) if Y is None: return Kmap else: return Kmap(Y)
class RBFDivFreeKernel(object): r""" Divergence-free Operator-Valued Kernel of the form: .. math:: X \mapsto K_X(Y) = exp(-\gamma||X-Y||^2)A_{X,Y}, where, .. math:: A_{X,Y} = 2\gamma(X-Y)(X-T)^T+((d-1)-2\gamma||X-Y||^2 I). Attributes ---------- gamma : {float} RBF kernel parameter. References ---------- See also -------- RBFDivFreeKernelMap Divergence-free Kernel map Examples -------- >>> import operalib as ovk >>> import numpy as np >>> X = np.random.randn(100, 2) >>> K = ovk.RBFDivFreeKernel(1.) >>> # The kernel matrix as a linear operator >>> K(X, X) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS <200x200 _CustomLinearOperator with dtype=float64> """ def __init__(self, gamma): """Initialize the Decomposable Operator-Valued Kernel. Parameters ---------- gamma : {float}, shape = [n_targets, n_targets] RBF kernel parameter. """ self.gamma = gamma def get_kernel_map(self, X): r"""Return the kernel map associated with the data X. .. math:: K_x: Y \mapsto K(X, Y) Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- K_x : DecomposableKernelMap, callable .. math:: K_x: Y \mapsto K(X, Y). """ from .kernel_maps import RBFDivFreeKernelMap return RBFDivFreeKernelMap(X, self.gamma) def get_orff_map(self, X, D=100, random_state=0): r"""Return the Random Fourier Feature map associated with the data X. .. math:: K_x: Y \mapsto \tilde{\Phi}(X) Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. Returns ------- \tilde{\Phi}(X) : Linear Operator, callable """ self.r = 1 if not hasattr(self, 'Xb_'): self.phi_ = RBFSampler(gamma=self.gamma, n_components=D, random_state=random_state) self.phi_.fit(X) self.Xb_ = self.phi_.transform(X) self.Xb_ = (self.Xb_.reshape((self.Xb_.shape[0], 1, self.Xb_.shape[1])) * self.phi_.random_weights_.reshape((1, -1, self.Xb_.shape[1]))) self.Xb_ = self.Xb_.reshape((-1, self.Xb_.shape[2])) D = self.phi_.n_components if X is self.Xb_: return LinearOperator(self.Xb_.shape, matvec=lambda b: dot(self.Xb_ * b), rmatvec=lambda r: dot(self.Xb_.T * r)) else: Xb = self.phi_.transform(X) # TODO: # w = self.phi_.random_weights_.reshape((1, -1, Xb.shape[1])) # wn = np.linalg.norm(w) # Xb = (Xb.reshape((Xb.shape[0], 1, Xb.shape[1])) * # wn * np.eye()w np.dot(w.T, w) / wn) Xb = Xb.reshape((-1, Xb.shape[2])) return LinearOperator(Xb.shape, matvec=lambda b: dot(Xb, b), rmatvec=lambda r: dot(Xb.T, r)) def __call__(self, X, Y=None): r"""Return the kernel map associated with the data X. .. math:: K_x: \begin{cases} Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\ K(X, Y) \enskip\text{otherwise.} \end{cases} Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples1, n_features] Samples. Y : {array-like, sparse matrix}, shape = [n_samples2, n_features], default = None Samples. Returns ------- K_x : DecomposableKernelMap, callable or LinearOperator .. math:: K_x: \begin{cases} Y \mapsto K(X, Y) \enskip\text{if } Y \text{is None,} \\ K(X, Y) \enskip\text{otherwise} \end{cases} """ Kmap = self.get_kernel_map(X) if Y is None: return Kmap else: return Kmap(Y)