def __init__(self, kernel_type='rbf', C=1, gamma=1, degree=3, tolerance=0.1, epsilon=0.1, max_iter=100, solver="smo"): self.__kernel = Kernel(kernel_type, gamma, degree) self.__C = C self.__tol = tolerance self.__error_cache = {} self.__eps = epsilon self.__max_iter = max_iter self.__solver = solver
def example(num_samples=10, num_features=2, grid_size=20, filename="svm.pdf"): samples = np.array( np.random.normal(size=num_samples * num_features).reshape( num_samples, num_features)) labels = 2 * (samples.sum(axis=1) > 0) - 1.0 data_dict = build_data_dict(samples, labels) svm = SVM(data=data_dict, kernel=Kernel.linear(), c=0.1) svm.fit() plot(svm, samples, labels, grid_size, filename)
def custom_SVM_test_function(X_train, y_train, X_test): data_dict = dataset_reader.build_data_dict(X_train, y_train) svm = custom_svm.SVM(data=data_dict, kernel=Kernel.linear(), c=1) svm.fit() y_pred = [] print(X_test[0]) for x in X_test: y_pred.append(svm.predict(x)) return y_pred
def train_eval(config, args, X_train, Y_train, X_test=None, Y_test=None): seed = str(args['seed']) if not args['split_ready'] else '' model_path = "%s%s_%s.pkl" % (args['model_path'], args['dataset'], seed) ker = Kernel(config, args['kernel_type']) logging.info('Training on dataset %s...' % args['dataset']) logging.info('\tComputing %s kernel.' % args['kernel_type']) K_train = ker.fit_transform(X_train) lins = [] nans = [] for col in range(Y_train.shape[1]): Y_train_all = Y_train[:, col] K_train_notnan = K_train[~np.isnan(Y_train_all)][:, ~np. isnan(Y_train_all)] Y_train_notnan = Y_train_all[~np.isnan(Y_train_all)] nans.append(np.isnan(Y_train_all)) if args['metric'] in ['ROC', 'PRC']: logging.info('\tTraining classifier on task %d.' % (col + 1)) lin = svm.SVC(kernel='precomputed', C=10, probability=True) lin.fit(K_train_notnan, Y_train_notnan) else: logging.info('\tTraining regressor on task %d.' % (col + 1)) lin = svm.SVR(kernel='precomputed', C=10) lin.fit(K_train_notnan, Y_train_notnan) lins.append(lin) model = {'kernel': ker, 'linear': lins, 'nans': nans} save_model(model, model_path) logging.info('\tTrained model saved to \"%s\".' % (model_path.split('/')[-1])) if X_test is not None and Y_test is not None: score = evaluate(args, X_test, Y_test) logging.info('\tAll tasks averaged score (%s): %.6f.' % (args['metric'], score)) return score
def plot_results(time_points, values): axis_x = np.arange(0,5.1,0.1) fig = plt.figure(0) plt.axis([0,5,-2,2], facecolor = 'g') plt.grid(color='w', linestyle='-', linewidth=0.5) ax = fig.add_subplot(111) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.patch.set_facecolor('#E8E8F1') # show mean mu = np.zeros(axis_x.size) var = np.zeros(axis_x.size) ker = Kernel() ker.SE(1,1) gp = GP() for i in range(axis_x.size): mu[i],var[i],_ = gp.GPR(time_points = time_points,values = values, predict_point = axis_x[i], kernel = ker) # show covariance print mu plt.fill_between(axis_x,mu + var,mu-var,color = '#D1D9F0') # show mean plt.plot(axis_x, mu, linewidth = 2, color = "#5B8CEB") # show the points plt.scatter(time, value,color = '#598BEB') plt.show()
def example(grid_size=30): samples, labels = generate_data() # samples = np.matrix(np.random.normal(size=num_samples * num_features).reshape(num_samples, num_features)) # labels = 2 * (samples.sum(axis=1) > 0) - 1.0 shufled_data = np.concatenate((samples, labels), axis=1) np.random.shuffle(shufled_data) f1_scores = [] parts = 5 trainer = SVMTrainer(Kernel.gaussian_trans(0.5), 0.1) for i in range(parts): train_data, train_labels, test_data, test_labels = split_data( shufled_data, parts, i) predictor = trainer.train(train_data, train_labels) f1 = f_measure(test_labels, predictor, test_data) print(f1) f1_scores.append(f1) print(np.average(np.array(f1_scores))) predictor = trainer.train(samples, labels) plot(predictor, samples, labels, grid_size)
class TestKernel(unittest.TestCase): def setUp(self): self.python = Kernel('python', os.path.join(PATH, 'pykernel.py')) self.ruby = Kernel('ruby', os.path.join(PATH, 'rubykernel.rb')) self.kernels = [self.python, self.ruby] def tearDown(self): for kernel in self.kernels: kernel.terminate() def test_encode_message(self): message = 'a\nb c' for kernel in self.kernels: encoded = kernel._encode(message).decode(kernel._ENCODING) self.assertEqual(encoded, u'a\uffffb c') def test_decode_message(self): message = u'while 1:\n pass' for kernel in self.kernels: encoded = message.encode(kernel._ENCODING) decoded = kernel._decode(encoded) self.assertEqual(decoded, message) def test_eval_python(self): self.python.send('print range(5)') result = self.python.recv() self.assertEqual(result, '[0, 1, 2, 3, 4]\n') def test_eval_ruby(self): self.ruby.send('puts 2 + 3') result = self.ruby.recv() self.assertEqual(result, '5\n') def test_eval_code(self): for kernel in self.kernels: result = kernel.eval_code('print 5+3') self.assertEqual(result, 8)
def setUp(self): self.python = Kernel('python', os.path.join(PATH, 'pykernel.py')) self.ruby = Kernel('ruby', os.path.join(PATH, 'rubykernel.rb')) self.kernels = [self.python, self.ruby]
class SVM: """ Binary Support Vector Classification Parameters ---------- kernel_type : string, optional (default = "rbf") - Kernel to be used to transform data C : float, optional (default = 1) - Coefficient of error term in Soft Margin Obj Function gamma : float, optional (default = 1) - Paramter in RBF and Sigmoid Kernel Functions degree : float, optional (default = 3) - Degree of Polynomial in Polynomial Kernel Function tolerance : float, optional (default = 1e-4) - tolerance for stopping criteria epsilon : float, optional (defualt = 1e-4) - UPDATE AFTER UNDERSTANDING max_iter : int - The maximum number of iterations of SMO. solver : string, optional (default = "smo") - Which optimization algorithm to use for the dual form of the Obj """ def __init__(self, kernel_type='rbf', C=1, gamma=1, degree=3, tolerance=0.1, epsilon=0.1, max_iter=100, solver="smo"): self.__kernel = Kernel(kernel_type, gamma, degree) self.__C = C self.__tol = tolerance self.__error_cache = {} self.__eps = epsilon self.__max_iter = max_iter self.__solver = solver def __del__(self): pass def fit(self, x, y): """ Parameters ---------- x : ndarray - Data y : ndarray - Labels Actions ------- Creates the SVM model over x and y - Calls SMO to solve for alphas and b - Sets the support vectors """ self.__xs = x self.__ys = y self.__size = x.shape[0] self.__kernel_mat = self.__get_kernel_matrix() # Get the alphas and b from smo self.__alphas, self.__b = self.__solve(self.__solver) supp_idxs = np.nonzero(self.__alphas)[0] self.__supp_x = np.take(self.__xs, supp_idxs, axis=0) self.__supp_y = np.take(self.__ys, supp_idxs) self.__supp_a = np.take(self.__alphas, supp_idxs) def __get_kernel_matrix(self): """ Actions: ------- Compute and store the kernel matrix KM over the input data """ self.__KM = np.zeros((self.__size, self.__size)) for i in range(self.__size): for j in range(self.__size): self.__KM[i, j] = self.__kernel.eval(self.__xs[i], self.__xs[j]) def __initialize_error_cache(self): """ Actions ------- Initialize the error over each training example Since the model output is initially 0 for all xi, the Error is 0 - yi = -yi """ for i in range(self.__size): self.__error_cache[i] = -self.__ys[i] def __solve(self, solver): """ Parameters ---------- solver : string - The quadratic optimization algorithm to use Actions: -------- Calls the appropriate optimizer function """ if (solver == "smo"): return self.__smo() def __get_bounds(self, s, i1, i2): """ Parameters ---------- s : int - 1 if x[i1] and x[i2] have same label, -1 otherwise i1 : int - Index of second chosen Lagrange multiplier i2 : int - Index of first chosen Lagrange multiplier Returns: -------- The minimum and maximum values that the new alpha2 can take """ return L, H def __optimize_and_clip(self, a2_old, y2, E1, E2, eta, L, H): """ Parameters ---------- a2_old : float - current values of alphas[i2] y2 : int - label of x[i2] E1 : float - Error on x[i1] E2 : float - Error on x[i2] eta : floats - Second derivative of dual objective L : float - Lower bound for new alpha2 H : float - Upper bound for new alpha2 Returns: -------- a2 : float - The new optimal, clipped lagrangian multiplier """ a2 = a2_old + ((y2 * (E1 - E2)) / eta) if (a2 < L): a2 = L elif (a2 > H): a2 = H return a2 def __take_step(self, i1, i2, E2): """ Parameters ---------- i1 : int - Index of second chosen Lagrange multiplier i2 : int - Index of second first Lagrange multiplier E2 : float - Error on the first chosen Lagrange multiplier Actions ------- Update alphas by optimizing 2 Find optimal value for 1 alpha, clip so constraints are not violated, then solve for Returns ------- """ # Same alphas or invalid alpha if (i1 == i2 or i1 == -1): return 0 # old alphas alph1 = self.__alphas[i1] alph2 = self.__alphas[i2] # ys y1 = self.__ys[i1] y2 = self.__ys[i2] # Error on alph1 E1 = self.__error_cache[i1] s = y1 * y2 # Compute Lower and Upper bounds if (s > 0): L = max(0, self.__alphas[i2] - self.__alphas[i1] - self.__C) H = min(self.__C, self.__alphas[i2] + self.__alphas[i1]) else: L = max(0, self.__alphas[i2] - self.__alphas[i1]) H = min(self.__C, self.__C + self.__alphas[i2] - self.__alphas[i1]) # No feasible alphas here if (L == H): return 0 # eta = k11 + k22 - k12 k11 = self.__KM[i1, i1] k22 = self.__KM[i2, i2] k12 = self.__KM[i1, i2] eta = k11 + k22 - (2 * k12) # Optimum is valid. Set new a2 to optimal value and then clip if (eta > 0): a2 = alph2 + ((y2 * (E1 - E2)) / eta) if (a2 < L): a2 = L elif (a2 > H): a2 = H # eta is equal to 0,so we can't get the optimum for a2 # settle for setting it to the max of the endpoints of the constraint line else: f1 = (y1 * (E1 + self.__b)) - (self.__alphas[i1] * k11) - (s * self.__alphas[i2] * k12) f2 = (y2 * (E2 + self.__b)) - (s * self.__alphas[i1] * k12) - (self.__alphas[i2] * k22) L1 = self.__alphas[i1] + (s * (self.__alphas[i2] - L)) H1 = self.__alphas[i1] + (s * (self.__alphas[i2] - H)) Lobj = L1 * f1 + L * f2 + 0.5 * pow(L1, 2) * k11 + 0.5 * pow( L, 2) * k22 + s * L * L1 * k12 Hobj = H1 * f1 + H * f2 + 0.5 * pow(H1, 2) * k11 + 0.5 * pow( H, 2) * k22 + s * H * H1 * k12 if (Lobj < Hobj - self.__eps): a2 = L elif (Lobj < Hobj + self.__eps): a2 = H else: a2 = alph2 # Not a significant change in a2 so dont change it and return 0 if (abs(a2 - alph2) < self.__eps * (a2 + alph2 + self.__eps)): return 0 # Compute a1 from box linear constraint a1 = alph1 + s * (alph2 - a2) # Make sure a1 is in feasible region if (a1 < 0): a2 += s * a1 a1 = 0 elif (a1 > self.__C): a2 += s * (a1 - self.__C) a1 = self.__C # Update threshold to reflect change in Lagrange multipliers b1 = self.__b + E1 + y1 * (a1 - alph1) * k11 + y2 * (a2 - alph2) * k12 b2 = self.__b + E2 + y1 * (a1 - alph1) * k12 + y2 * (a2 - alph2) * k22 b_new = (b1 + b2) / 2 delta_b = b_new = self.__b self.__b = b_new # Update error cache using new Lagrange multipliers t1 = y1 * (a1 - alph1) t2 = y2 * (a2 - alph2) for i in range(self.__size): if (self.__alphas[i] > 0 and self.__alphas[i] < self.__C): self.__error_cache[i] += t1 * self.__KM[ i1, i] + t2 * self.__KM[i2, i] - delta_b self.__error_cache[i1] = 0 self.__error_cache[i2] = 0 # store new alphas self.__alphas[i1] = a1 self.__alphas[i2] = a2 return 1 def __second_choice_heuristic(self, E2): """ Parameters ---------- E2 : int - Error on first selected alpha value Returns ------- i1 : int - The index of the training example that maximizes the absolute value of errors E1 and E2 """ tmax = 0 i1 = -1 for k in range(self.__size): if (self.__alphas[k] > 0 and self.__alphas[k] < self.__C): E1 = self.__error_cache[k] temp = abs(E2 - E1) if (temp > tmax): tmax = temp i1 = k return i1 def __examine_example(self, i2): """ Paramters --------- i2 : int - Index of current training example Actions: ------- Checks if alphas[i2] violates the KKT conditions If it does, use the heuristics to select the second alpha value Then optimize them jointly by calling takeStep Returns ------ int (0 or 1) - Whether or not alphas[i2] was changed """ y2 = self.__ys[i2] alph2 = self.__alphas[i2] E2 = np.sign(self.__u(idx=i2)) - y2 r2 = E2 * y2 if ((r2 < -self.__tol and alph2 < self.__C) or (r2 > self.__tol and alph2 > 0)): i1 = self.__second_choice_heuristic(E2) if (self.__take_step(i1, i2, E2)): return 1 idx = random.randint(0, self.__size) for i1 in list(range(idx, self.__size)) + list(range(0, idx)): if (self.__alphas[i1] > 0 and self.__alphas[i1] < self.__C): if (self.__take_step(i1, i2, E2)): return 1 for i1 in list(range(idx, self.__size)) + list(range(0, idx)): if (self.__take_step(i1, i2, E2)): return 1 return 0 def __smo(self): """ Sequential Minimal Optimization of Dual Form of Obj Function Returns ------- self.__alphas : ndarray - Optimal Lagrange Multipliers in Obj Function self.__b : float - straight line distance to optimal hyperplane """ self.__alphas = np.zeros(self.__size) self.__b = 0 # num alphas changed in 1 pass of training set num_changed = 0 # variable used to alternate between 1 pass over all # training examples and 1 pass over examples whose # Lagrange multipliers are not at their bounds examine_all = 1 # Store errors to speed up algorithm self.__initialize_error_cache() steps = 0 while (num_changed > 0 or examine_all): steps += 1 if (steps > self.__max_iter): break if (examine_all): # loop over entire training set for i in range(self.__size): num_changed += self.__examine_example(i) else: for i in range(self.__size): # loop over non boundary training examples if (self.__alphas[i] != 0 and self.__alphas[i] != self.__C): num_changed += self.__examine_example(i) if (examine_all == 1): examine_all = 0 elif (num_changed == 0): examine_all = 1 return self.__alphas, self.__b def __u(self, x=None, idx=None): """ Parameters ---------- x : ndarray, optional (default = None) One data vector idx : int, optional (default = None) If not None, x = self.xs[idx] Returns ------- u : float -The evaluation of the decision function at point x. Note: When this function is called during training, we use the precomputed kernel vector. When called during testing, we compute the kernel vector. """ if (idx != None): kernel_vector = self.__KM[idx, :] return np.sum(kernel_vector * self.__ys * self.__alphas) - self.__b else: if (self.__supp_x.shape[0] == 0): return -self.__b kernel_vector = np.apply_along_axis(self.__kernel.eval, 1, self.__supp_x, x2=x) mult = kernel_vector * self.__supp_y, *self.__supp_a return np.sum(mult[0]) - self.__b def predict(self, xs): """ Parameters ---------- xs : ndarray - Input samples to predict the labels of Returns ------- ndarray - Predicted labels of xs """ def sign(x): return np.sign(self.__u(x=x)) return np.apply_along_axis(sign, 1, xs)
def __init__(self, max_num, domain): self.max_num = max_num rho0 = 1000. #rest density [ kg/m^3 ] VF = .0262144 #simulation volume [ m^3 ] VP = VF / max_num #particle volume [ m^3 ] m = rho0 * VP #particle mass [ kg ] re = (VP)**(1 / 3.) #particle radius [ m ] #re = (VP)**(1/2.) #particle radius [ m ] print "re, m, VP", re, m, VP rest_distance = .87 * re #rest distance between particles [ m ] smoothing_radius = 2.0 * rest_distance #smoothing radius for SPH Kernels boundary_distance = .5 * rest_distance #for calculating collision with boundary #the ratio between the particle radius in simulation space and world space print "VF", VF print "domain.V: ", domain.V print "VF/domain.V", VF / domain.V print "scale calc", (VF / domain.V)**(1 / 3.) #print "scale calc", (VF/domain.V)**(1/2.) sim_scale = (VF / domain.V)**(1 / 3.) #[m^3 / world m^3 ] #sim_scale = (VF / domain.V)**(1/2.) #[m^2 / world m^2 ] self.rho0 = rho0 self.VF = VF self.mass = m self.VP = VP self.re = re self.rest_distance = rest_distance self.smoothing_radius = smoothing_radius self.boundary_distance = boundary_distance self.sim_scale = sim_scale print "=====================================================" print "particle mass:", self.mass print "Fluid Volume VF:", self.VF print "simulation scale:", self.sim_scale print "smoothing radius:", self.smoothing_radius print "rest distance:", self.rest_distance print "=====================================================" #Other parameters self.K = 15. #Gas constant self.boundary_stiffness = 20000. self.boundary_dampening = 256. #friction self.friction_coef = 0. self.restitution_coef = 0. #not used yet self.shear = 0. self.attraction = 0. self.spring = 0. self.velocity_limit = 600. self.xsph_factor = .05 self.viscosity = .01 self.gravity = -9.8 self.EPSILON = 10E-6 #Domain self.domain = domain self.domain.setup(self.smoothing_radius / self.sim_scale) #Kernels self.kernels = Kernel(self.smoothing_radius)
#Load Train and embedding path_data = 'dataset/Xtr0.csv' path_label = 'dataset/Ytr0.csv' train_dataset = datahandler(path_data, path_label,features_generated=False) train_dataset.Y[train_dataset.Y==0]=-1 train_dataset.compute_vocabulary(6) train_dataset.mismatch_embedding(6,1,train_dataset.vocab) X_train0, Y_train = train_dataset.X_embedded,train_dataset.Y kernel = Kernel(Kernel.dot_product()) K0 = kernel.kernel_matrix(X_train0) #Load Test and embedding path_data = 'dataset/Xte0.csv' path_label = 'dataset/Ytr0.csv' test_dataset = datahandler(path_data, path_label,features_generated=False) test_dataset.mismatch_embedding(6,1,train_dataset.vocab) X_test0 = test_dataset.X_embedded ########################################## #KERNEL 2 ########################################## train_dataset.compute_vocabulary(7)
# positives = [[4,4], [5,5]] positives = [[5, 1], [6, -1], [7, 3]] # positives = [[-0.23530383, 1.70585848], # [ 0.8157926 , 0.04591391], # [ 0.03237168, 1.36243792]] # negatives = [[2,1], [1,2]] negatives = [[1, 7], [2, 8], [3, 8]] # negatives = [[-0.07810244, -0.65502153], # [ 0.25648505, -0.79438534], # [-0.83531028, -0.18554141], # [ 0.41896733, -0.73003242], # [-1.00007796, 0.00366544], # [-1.58005843, 0.83875439], # [ 0.77187267, -1.67242829]] data_dict = {-1: np.array(negatives), 1: np.array(positives)} num_samples = len(positives) + len(negatives) samples = np.append(np.array(positives), np.array(negatives)).reshape(num_samples, 2) labels = np.append(np.ones(len(positives)), np.ones(len(negatives)) * -1) svm = SVM(data=data_dict, kernel=Kernel.linear(), c=0.1) svm.fit() plot(svm, samples, labels, 20, "svm_run.pdf") #print(svm.predict(test_data))
'metric' : 'rmse'} bounds = [(1.0e-5, 1.0e-1), # learning rate (0.5, 0.9999), # change of learning rate (2, 1000)] # number of leaves n_random_trials = 3 # initiate Bayesian optimization with 3 random draws n_searches = 10 # Use my Bayesian Optimization mdl = Model(data_mat, lags, n_oos, n_val, prediction_range, target_vars_inds, params) kernel = Kernel("rbf", 1) bo = BayesianOptimization(mdl.obj_fun, bounds, kernel, expected_improvement, n_random_trials) ind, best_para_my, y = bo.search(n_searches, 2, 25) # Use Ax Bayesian Optimization n_random_trials = 5 # initiate Bayesian optimization with 3 random draws n_searches = 20 mdl = Model(data_mat, lags, n_oos, n_val, prediction_range, target_vars_inds, params)
import matplotlib.pyplot as plt import svm from kernels import Kernel iris = datasets.load_iris() y = iris.target y = np.asarray([-1 if x == 0 else x for x in y]) sel = [y != 2] y = y[sel] X = iris.data[:, :2] X = X[sel] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) C = 0.1 trainer = svm.SVMTrainer(kernel=Kernel.linear(), c=C) model = trainer.train(X_train, y_train) x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 h = 0.02 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) xxyy = np.stack((xx.ravel(), yy.ravel()), axis=-1) result = [] for i in range(len(xxyy)): result.append(model.predict(xxyy[i])) Z = np.array(result).reshape(xx.shape)
plt.plot(axis_x, mu, linewidth=2, color=mean_color[i % k_color]) # background color # plt.axis([0,5,-2,2], facecolor = 'g') # plt.grid(color='w', linestyle='-', linewidth=0.5) # ax.patch.set_facecolor('#E8E8F1') # show the points for i in range(len(GPs)): plt.scatter(GPs[i].time_points, GPs[i].values, color=mean_color[i % k_color], marker='X') plt.show() if __name__ == '__main__': X = np.matrix([1, 2, 3, 4]).reshape(-1, 1) Y = np.sin(X) # np.random.randn(20,1)*0.05 X = X.reshape(4, ) Y = Y.reshape(4, ) k1 = Kernel("SE", np.sqrt(2), 1) gp1 = GP(time_points=X.T, values=Y.T, kernel=k1) gp1.optimize() print gp1.se_function([0.697774447341, 1.61119536129, 7.64794567566e-09]) #print k1.cal_SE()