def kernel_ridge_pre(X_train, y_train, X_pre, val): kernel = KernelRidge(kernel=val['kernel'], alpha=val['alpha'], gamma=val['gamma']) kernel.fit(X_train, y_train) y_pre = kernel.predict(X_pre) return y_pre
class KernelRidgeImpl(): def __init__(self, alpha=1, kernel='linear', gamma=None, degree=3, coef0=1, kernel_params=None): self._hyperparams = { 'alpha': alpha, 'kernel': kernel, 'gamma': gamma, 'degree': degree, 'coef0': coef0, 'kernel_params': kernel_params } self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def mytrainingaux(X, Y, par): #reg=neighbors.KNeighborsRegressor(n_neighbors=par) reg = KernelRidge(kernel='rbf', gamma=par[0], alpha=par[1]) reg.fit(X, Y) return reg
def fit_krr(apar, gpar, nevt): # retrieve training data X = root2array('../../svm/no_truecc_cut_stride2_offset0.root', branches='recotrklenact', selection='mustopz<1275&&isnumucc==1', stop=nevt).reshape(-1, 1) y = root2array('../../svm/no_truecc_cut_stride2_offset0.root', branches='trueemu', selection='mustopz<1275&&isnumucc==1', stop=nevt) # rescale the regressors and save it os.system('mkdir -p models') scaler = preprocessing.StandardScaler().fit(X) scalerpn = 'models/regressor_scaler_active_a{}g{}nevt{}.pkl'.format( apar, gpar, nevt) joblib.dump(scaler, scalerpn) # fit the model krr = KernelRidge(kernel='rbf', alpha=float(apar), gamma=float(gpar)) Xnorm = scaler.transform(X) krr.fit(Xnorm, y) # save the model modelpn = 'models/muon_energy_estimator_active_a{}g{}nevt{}.pkl'.format( apar, gpar, nevt) joblib.dump(krr, modelpn)
def test_regressor_modifications(self): regressor = KernelRidge(alpha=1e-8, kernel="rbf", gamma=0.1) kpcovr = self.model(mixing=0.5, regressor=regressor, kernel="rbf", gamma=0.1) # KPCovR regressor matches the original self.assertTrue( regressor.get_params() == kpcovr.regressor.get_params()) # KPCovR regressor updates its parameters # to match the original regressor regressor.set_params(gamma=0.2) self.assertTrue( regressor.get_params() == kpcovr.regressor.get_params()) # Fitting regressor outside KPCovR fits the KPCovR regressor regressor.fit(self.X, self.Y) self.assertTrue(hasattr(kpcovr.regressor, "dual_coef_")) # Raise error during KPCovR fit since regressor and KPCovR # kernel parameters now inconsistent with self.assertRaises(ValueError) as cm: kpcovr.fit(self.X, self.Y) self.assertTrue( str(cm.message), "Kernel parameter mismatch: the regressor has kernel parameters " "{kernel: linear, gamma: 0.2, degree: 3, coef0: 1, kernel_params: None}" " and KernelPCovR was initialized with kernel parameters " "{kernel: linear, gamma: 0.1, degree: 3, coef0: 1, kernel_params: None}", )
class KRR_calibration: def __init__(self): self.model = 'KRR' def fit(self, X, p, Y, kernel_function='rbf', **kwargs): from sklearn.kernel_ridge import KernelRidge check_attributes(X, Y) self.model = KernelRidge(kernel=kernel_function, **kwargs) observed_bias = Y - p self.model.fit(X, observed_bias) return self.model def predict(self, X, p=None, mode='prob'): if mode == 'bias': return self.model.predict(X) elif mode == 'prob': return self.model.predict(X) + p.flatten() else: raise ValueError("Mode %s is not defined." % mode)
def KernelRIDGE(X_train, X_dev, y_train, y_dev): KERNEL = 'polynomial' DEGREE = 2 ALPHA = [ 0.00001, 0.00003, 0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000 ] ALPHA = [0.03, 0.05, 0.1, 0.15, 0.3] ALPHA = [0.05, 0.1, 0.2] ALPHA = [0.02] for hyper in ALPHA: KRR = KernelRidge( alpha=hyper, kernel=KERNEL, degree=DEGREE, ) KRR.fit(X_train, y_train) ev.evaluate(KRR, 'KERNEL_RID', 'alpha', hyper, X_train, X_dev, y_train, y_dev) print(" ") return KRR
def run(self, ind_sampling, ind_fold): if self.fold_setting=="S4": nb_fold = self.nb_fold * self.nb_fold self.load_CV_indexes(ind_sampling) if self.CV_type == 'ClusterCV_': ajout = self.CV_type else: ajout = 'CV_' K_train, K_test = self.make_Ktrain_and_Ktest_MT_with_settings(self.samples_tr[ind_fold], self.samples_te[ind_fold]) pred_score = [] for param in range(len(self.list_param)): if self.type_clf=="SVM": clf = svm.SVC(kernel='precomputed', C=self.list_param[param]) clf.fit(K_train, self.labels_tr[ind_fold]) Y_test_score = clf.decision_function(K_test).tolist() elif self.type_clf=="KernelRidge": clf = KernelRidge(alpha=self.list_param[param], kernel='precomputed') clf.fit(K_train, inner_labels_tr[ind_fold]) Y_test_score = clf.predict(K_test).tolist() else: raise ValueError('invalid value of type_clf') pred_score.append(Y_test_score) del clf del Y_test_score pickle.dump(pred_score, open('saved_results/MT/MT_'+str(self.nb_fold)+'fold'+ajout+self.fold_setting+"_"+self.type_clf+"_"+str(ind_fold)+"_"+str(ind_sampling)+".data", 'wb')) del K_train del K_test
def ridgeReg(X, y): X_train, X_test, y_train, y_test = train_test_split(np.array(X)[:, 6:], np.array(y), test_size=0.20, random_state=1) #print(X_test) regr = KernelRidge(alpha=10, kernel="polynomial", gamma=0.5) regr.fit(X_train, y_train) y_pred = regr.predict(X_test) index = 0 for i in y_pred: #print("ypred = " + str(i) + " y test = " + str(y_test[index])) index = index + 1 #print('Coefficients: \n', regr.coef_) # The mean squared error print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' % r2_score(y_test, y_pred)) #What were the real predictions? y_pred_train = regr.predict(X_train) print("Mean squared error on the training set: %.2f" % mean_squared_error(y_train, y_pred_train)) print("Mean squared error on the test set: %.2f" % mean_squared_error(y_test, y_pred)) print("size of X = ", str(len(y)))
def get_reconstruction_error(ct, data, nsplits=4, clf='kridge'): tasknames = [i.split('.')[0] for i in data.columns] tasks = list(set(tasknames)) tasks.sort() chosen_vars = [] #print(ct,tasks,tasknames) for i in ct: vars = [ j for j in range(len(tasknames)) if tasknames[j].split('.')[0] == tasks[i] ] chosen_vars += vars kf = KFold(n_splits=nsplits, shuffle=True) fulldata = data.values #subdata=data.ix[:,chosen_vars].values if clf == 'kridge': linreg = KernelRidge(alpha=1) elif clf == 'rf': linreg = RandomForestRegressor() else: linreg = LinearRegression() scaler = StandardScaler() pred = numpy.zeros(fulldata.shape) for train, test in kf.split(fulldata): #fulldata_train=fulldata[train,:] #fulldata_test=fulldata[test,:] # fit scaler to train data and apply to test fulldata_train = scaler.fit_transform(fulldata[train, :]) fulldata_test = scaler.transform(fulldata[test, :]) subdata_train = fulldata_train[:, chosen_vars] subdata_test = fulldata_test[:, chosen_vars] linreg.fit(subdata_train, fulldata_train) pred[test, :] = linreg.predict(subdata_test) cc = numpy.corrcoef(scaler.transform(fulldata).ravel(), pred.ravel())[0, 1] return cc
def local_bias_estimator(X, Y, p, X_grid, model='KRR', kernel_function='rbf', **kwargs): check_attributes(X, Y) if model == 'KRR': from sklearn.kernel_ridge import KernelRidge model = KernelRidge(kernel=kernel_function, **kwargs) # kr = KernelRidge(alpha=alpha, kernel='rbf', **kwargs) elif model == 'SVR': from sklearn.svm import SVR model = SVR(kernel=kernel_function, **kwargs) elif model == 'EWF': K = pairwise_kernels(X, X_grid, metric=kernel_function, **kwargs) p_err = Y - p bias = np.sum(p_err.flatten() * K.T, axis=1) / np.sum(K.T, axis=1) return bias else: raise ValueError("Model %s is not defined." % model) bias_calibration = Y - p model.fit(X, bias_calibration) bias = model.predict(X_grid) return bias
def mytraining(X, Y): #reg = svm.SVR(kernel='rbf',C=1000,gamma=0.1) reg = KernelRidge(alpha=0.001, coef0=1, degree=3, gamma=0.1, kernel='rbf') reg.fit(X, Y.ravel()) return reg
def choose_krr_gamma(train_x, test_x, train_y, test_y): gammas = [0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.0] gamma_scores = [] best_g_score = 0.0 best_g = "" for g in gammas: krr = KernelRidge(kernel="laplacian", gamma=g) krr.fit(train_x, train_y) krr.predict(test_x) score = krr.score(test_x, test_y) if score > best_g_score: best_g_score = score best_g = g gamma_scores.append(score) print(gamma_scores) print("Best gamma: " + str(best_g)) print("Score received: " + str(best_g_score)) plt.plot(gammas, gamma_scores) plt.xlabel('Gamma') plt.ylabel('Score') plt.title('Tuning Gamma Hyperparameter for KRR') plt.show()
def choose_krr_alpha(train_x, test_x, train_y, test_y): alphas = [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.0] alpha_scores = [] best_a_score = 0.0 best_a = "" for a in alphas: krr = KernelRidge(kernel="laplacian", alpha=a) krr.fit(train_x, train_y) krr.predict(test_x) score = krr.score(test_x, test_y) if score > best_a_score: best_a_score = score best_a = a alpha_scores.append(score) print(alpha_scores) print("Best alpha: " + str(best_a)) print("Score received: " + str(best_a_score)) plt.plot(alphas, alpha_scores) plt.xlabel('Alpha') plt.ylabel('Score') plt.title('Tuning Alpha Hyperparameter for KRR') plt.show()
def choose_krr_kernel(train_x, test_x, train_y, test_y): kernels = ['linear', 'rbf', 'laplacian', 'polynomial', 'sigmoid'] kernel_scores = [] best_k_score = 0.0 best_k = "" for k in kernels: krr = KernelRidge(kernel=k) krr.fit(train_x, train_y) krr.predict(test_x) score = krr.score(test_x, test_y) if score > best_k_score: best_k_score = score best_k = k kernel_scores.append(score) print(kernel_scores) print("Best kernel: " + str(best_k)) print("Score received: " + str(best_k_score)) plt.bar(kernels, kernel_scores) plt.xlabel('Kernel') plt.ylabel('Score') plt.xticks(np.arange(len(kernels)), kernels) plt.title('Tuning Kernel Hyperparameter for KRR') plt.show()
def test_incompatible_coef_shape(self): # self.Y is 2D with two targets # Don't need to test X shape, since this should # be caught by sklearn's _validate_data regressor = KernelRidge(alpha=1e-8, kernel="linear") regressor.fit(self.X, self.Y[:, 0][:, np.newaxis]) kpcovr = self.model(mixing=0.5, regressor=regressor) # Dimension mismatch with self.assertRaises(ValueError) as cm: kpcovr.fit(self.X, self.Y[:, 0]) self.assertTrue( str(cm.message), "The regressor coefficients have a dimension incompatible " "with the supplied target space. " "The coefficients have dimension %d and the targets " "have dimension %d" % (regressor.dual_coef_.ndim, self.Y[:, 0].ndim), ) # Shape mismatch (number of targets) with self.assertRaises(ValueError) as cm: kpcovr.fit(self.X, self.Y) self.assertTrue( str(cm.message), "The regressor coefficients have a shape incompatible " "with the supplied target space. " "The coefficients have shape %r and the targets " "have shape %r" % (regressor.dual_coef_.shape, self.Y.shape), )
def get_SVM_NTK(self, for_test: bool): if self.params['kernel_ridge']: clf = KernelRidge(alpha=self.params['ridge_coef'][0], kernel="precomputed") else: clf = SVR(kernel="precomputed", C=self.params['svm_coef'][0], epsilon=self.params['svm_coef'][1], cache_size=100000) output = [] train = not for_test Ys_ = self.test_Ys_ if for_test else self.Ys_ N = self.N_test if for_test else self.N_train for idx in range(N): NTK_train = self.get_ntk(fst_train=train, fst_idx=idx, fst_qry=False, snd_train=train, snd_idx=idx, snd_qry=False, ridge=True) NTK_test = self.get_ntk(fst_train=train, fst_idx=idx, fst_qry=True, snd_train=train, snd_idx=idx, snd_qry=False, ridge=False) y = Ys_[idx] time_evolution = self.time_evolution(NTK_train, self.params['inner_lr']) clf.fit(X=NTK_train, y=time_evolution @ y) pred = clf.predict(X=NTK_test) output.append(pred) return np.concatenate(output)
def train_model(input_X_h5_loc, labels_y_h5_loc, model_loc, alpha, kernel, gamma, degree, coef0, save_model): """ Trains a kernel ridge regression model See Scikit-learn documentation : http://scikit-learn.org/stable/modules/generated/sklearn. kernel_ridge.KernelRidge.html#sklearn.kernel_ridge.KernelRidge """ total_time = time.time() # Loading inputs and targets input_X = np.array(h5py.File(input_X_h5_loc)[inputs_key]) labels_y = np.array(h5py.File(labels_y_h5_loc)[targets_key]).reshape((-1,)) # Creating model model = KernelRidge(degree=degree, coef0=coef0, kernel=kernel, gamma=gamma, alpha=alpha) # Model training model.fit(input_X, labels_y) # Saving the model if specified if save_model: os.makedirs(model_loc[:model_loc.rindex(os.path.sep)], exist_ok=True) joblib.dump(model, model_loc) print("--- %s seconds ---" % (time.time() - total_time))
def train_krrl_linear(self, data): train, validacion = data x_tr, y_tr = train x_val, y_val = validacion #print("El set de train tiene {} filas y {} columnas".format(x_tr.shape[0],x_tr.shape[1])) #print("El set de validacion tiene {} filas y {} columnas".format(x_val.shape[0],x_val.shape[1])) print('Start training KernerRidge with linear kernel...') start_time = self.timer() krrl = KernelRidge(alpha=1) krrl.fit(x_tr, y_tr) print("The R2 is: {}".format(krrl.score(x_tr, y_tr))) # print("The alpha choose by CV is:{}".format(krrl.alpha_)) self.timer(start_time) print("Making prediction on validation data") y_val = np.expm1(y_val) y_val_pred = np.expm1(krrl.predict(x_val)) mae = mean_absolute_error(y_val, y_val_pred) print("El mean absolute error de es {}".format(mae)) print('Saving model into a pickle') try: os.mkdir('pickles') except: pass with open('pickles/krrlLinearK.pkl', 'wb') as f: pickle.dump(krrl, f) print('Making prediction and saving into a csv') y_test = krrl.predict(self.x_test) return y_test
def RunKernel(XTrain, YTrain, XVal, YVal, XTest, YTest): print("Optimizing Kernel Ridge Regression Parameters") #BestAlpha, BestGamma = DoGridSearch(XTrain, YTrain.ravel()) BestAlpha = 0.01 BestGamma = 0.001 KRR = KernelRidge(kernel='laplacian', gamma=BestGamma, alpha=BestAlpha) KRR.fit(XTrain, YTrain.ravel()) YPredTrain = KRR.predict(XTrain) DiffYTrain = abs(YPredTrain - YTrain.ravel()) print(sum(DiffYTrain) / float(len(DiffYTrain))) YPred = KRR.predict(XTest) DiffY = abs(YPred - YTest.ravel()) MAEPredicted = sum(DiffY) / float(len(DiffY)) print(BestAlpha, BestGamma) print(MAEPredicted) plt.scatter(YTest.tolist(), YPred.tolist(), c='red', s=5) plt.plot(np.linspace(0, 0.5, 2), np.linspace(0, 0.5, 2)) plt.ylabel('Predicted Excitation Energy (a.u.)') plt.xlabel('True Excitation Energy (a.u.)') plt.title( 'Kernel Ridge Regression (Laplacian) Learned Excitation Energies') plt.show() #RunKernel()
def train_kernel_ridge_regression_clf(self, train_daylist, distinct, gamma=1, alpha=1): daytest = self.select_test_day(train_daylist) y_train = [] X_train = [] for day in daytest: for slice in range(144): dateslice = day + '-' + str(slice + 1) #feature,gap = self.generateFeatureLabel(dateslice,distinct) feature, gap = self.feature.generate(dateslice, distinct) if feature != None: if gap != 0: gap = math.log10(float(gap)) else: gap = -0.1 X_train.append(feature) y_train.append(gap) clf = KernelRidge(kernel='polynomial', gamma=gamma, alpha=alpha) #clf = KernelRidge(kernel='polynomial', degree=3,alpha=0.01) clf.fit(X_train, y_train) return clf
def prin(X,y,file,dic): t=100 #clf = MLPRegressor(solver=dic['solver'],activation=dic['activation'],hidden_layer_sizes=eval(dic['hls']), batch_size = dic['batch_size'], max_iter=dic['max_iter']) #clf = LinearRegression() clf=KernelRidge(alpha=0.001,kernel='laplacian',degree=18) X_train, X_test, y_train, y_test= cross_validation.train_test_split(X,y,test_size=float(dic['test_size'])) clf.fit(X_train, y_train) print 'Training size',len(X_train) print 'Testing size',len(X_test) #scores = cross_val_score(clf, X, y, cv=5) #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) accuracy = clf.score(X_train,y_train) print 'accuracy',accuracy,'\n' print 'RMSE',math.sqrt(metrics.mean_squared_error(y_test,clf.predict(X_test))) MAE=metrics.mean_absolute_error(y_test,clf.predict(X_test)) print 'MAE',MAE #X_test,y_test=X[-t:],y[-t:] #file=file[-t:] pr=clf.predict(X_test) print 'Filename Percentage Error Actual Value Predicted Value Difference\n' for i in range (len(y_test)): if y_test[i]==0.0: y_test[i]=0.0000001 predi=str(round(((pr[i]-y_test[i])/y_test[i])*100,2))+' %' print file[i]+' '*(20-len(file[i])),' '*(20-len(predi))+ predi, ' '*(20-len(str(y_test[i])))+str(y_test[i]) , ' '*(20-len(str(round(pr[i],2))))+str(round(pr[i],2)),' '*(20-len(str(round((y_test[i]-pr[i]),4))))+str(round((y_test[i]-pr[i]),4)) #print 'Mean square Error',mean_squared_error(X,pr) #print 'R2 score',r2_score(X,pr) #test(X,y,file,clf.coef_[0],clf.intercept_[0]) #plot_g(clf) return MAE
def KRR_CV(self, trainX, testX, trainY, testY): kernel_vals = ['rbf', 'laplacian'] kernel_indices = [0,1] inverse_gamma_vals = [1.0, 10.0, 20.0, 40.0, 80.0] alpha_vals = [0.0001, 0.001, 0.01, 0.1, 1.0] cv_errors = np.empty([len(kernel_vals)*len(inverse_gamma_vals)*len(alpha_vals), 4]) i = 0 for kern in kernel_vals: for g in inverse_gamma_vals: for a in alpha_vals: errors = np.empty([self.cv_split_no, 1]) kf = KFold(n_splits=self.cv_split_no, random_state=30, shuffle=True) j = 0 for train_indices, validation_indices in kf.split(trainX): training_set_X, validation_set_X = trainX[train_indices], trainX[validation_indices] training_set_Y, validation_set_Y = trainY[train_indices], trainY[validation_indices] regr = KernelRidge(alpha=a, gamma=1.0/g, kernel=kern) regr.fit(training_set_X, training_set_Y) predY = regr.predict(validation_set_X) errorY = np.absolute(predY - validation_set_Y) errors[j] = np.mean(errorY) j = j + 1 cv_errors[i,:] = kernel_indices[kernel_vals.index(kern)], g, a, np.mean(errors) i = i + 1 k_opt, g_opt, a_opt, _ = cv_errors[np.argmin(cv_errors[:, 3]), :] k_opt = kernel_vals[kernel_indices.index(k_opt)] regr = KernelRidge(alpha=a_opt, gamma=1.0/g_opt, kernel=k_opt) regr.fit(trainX, trainY) predY = regr.predict(testX) err_on_opt_params = np.absolute(predY - testY) return err_on_opt_params
def train_select_regressor(X, y, param_grid, label, scalers_dict): # Select label y_selected = y[label].to_numpy() # Standardize y y_selected_std = scalers_dict[label].transform(y_selected.reshape(-1, 1)) # Initialize regressor if (grid_search): # Instantiate model kern_regr = KernelRidge(kernel="rbf") # Initialize Grid Search reg = GridSearchCV(kern_regr, param_grid, verbose=3, n_jobs=2, scoring='r2') # Refit reg.fit(X, y_selected_std) # Return regressor wrapper return MedRegressorWrapper(label, reg.best_estimator_, reg.best_params_, reg.best_score_) else: # Instantiate model kern_regr = KernelRidge(kernel="rbf", alpha=1, gamma=0.01) # Fit kern_regr.fit(X, y_selected_std) # Return regressor return MedRegressorWrapper(label, kern_regr, None, -1)
def generate(self): neuroticismModelRightEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # neuroticismModelLeftEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # neuroticismModelFace = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # neuroticismModelSmile = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # # extraversionModelRightEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # extraversionModelLeftEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # extraversionModelFace = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # extraversionModelSmile = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # # conscientiousnessModelRightEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # conscientiousnessModelLeftEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # conscientiousnessModelFace = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # conscientiousnessModelSmile = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # # agreeablenessModelRightEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # agreeablenessModelLeftEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # agreeablenessModelFace = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # agreeablenessModelSmile = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # # opennessModelRightEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # opennessModelLeftEye = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # opennessModelFace = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) # opennessModelSmile = KernelRidge(kernel='rbf', alpha=0.1, gamma=0.1) neuroticismModelRightEye.fit( self.featuresDict['righteye'], self.labelsDict['righteye']['neuroticism'])
def ridge_regression(K1, K2, y1, y2, alpha, c): n_val, n_train = K2.shape clf = KernelRidge(kernel="precomputed", alpha=alpha) one_hot_label = np.eye(c)[y1] - 1.0 / c clf.fit(K1, one_hot_label) z = clf.predict(K2).argmax(axis=1) return 1.0 * np.sum(z == y2) / n_val
def reg_krr(X, y, kwargs_set): model = KernelRidge(alpha = kwargs_set['alfa']) if len(X.shape) == 1: model.fit(X.values.reshape(-1, 1), y) else: model.fit(X, y) return model
def choose_alpha_ridge(X, y, range_C, gammaX, plot_color): '''Implement 5 fold cv to determine optimal gamma''' #Param setup kf = KFold(n_splits = 5) mean_error=[]; std_error=[]; for C in range_C: #Params mse_temp = [] #Model model = KernelRidge(alpha= 1.0/(2*C), kernel= 'rbf', gamma=gammaX) #5 fold CV for train, test in kf.split(X): #Model model.fit(X[train], y[train]) ypred = model.predict(X[test]) mse = mean_squared_error(y[test], ypred) mse_temp.append(mse) #Get mean & variance mean_error.append(np.array(mse_temp).mean()) std_error.append(np.array(mse_temp).std()) #Plot fig = plt.figure(figsize=(15,12)) plt.errorbar(range_C, mean_error, yerr=std_error, color = plot_color) plt.xlabel('C') plt.ylabel('Mean square error') plt.title('Choice of C in kernelised Ridge Regression - 5 fold CV, gamma = {}'.format(gammaX)) plt.show()
def fit(self, features, targets, cv=5, alpha=1e-8, scoring_criteria='neg_mean_absolute_error', threshold=1e-3): """ Fit the dataset with kernel ridge regression. Args: features (np.array): features X. targets (np.array): targets y. cv (int): The numbre of folds in cross validation. Default to 5. alpha (float): Small positive number. Regularization parameter in KRR. scoring (str): The scoring strategy to evaluate the prediction on test sets. The same as the scoring parameter in sklearn.model_selection.GridSearchCV. Default to 'neg_mean_absolute_error', i.e. MAE. threshold (float): The converged threshold of final optimal sigma. Returns: (float) The optimized sigma. """ st_gamma = -np.inf nd_gamma = np.inf gamma_trials = np.logspace(-6, 4, 11) while (abs(st_gamma - nd_gamma) > threshold): kr = GridSearchCV(KernelRidge(kernel='rbf', alpha=alpha, gamma=0.1), cv=cv, param_grid={"gamma": gamma_trials}, return_train_score=True) kr.fit(features, targets) cv_results = pd.DataFrame(kr.cv_results_) st_gamma = cv_results['param_gamma'][cv_results['rank_test_score'] == 1].iloc[0] nd_gamma = cv_results['param_gamma'][cv_results['rank_test_score'] == 2].iloc[0] gamma_trials = np.linspace(min(st_gamma, nd_gamma), max(st_gamma, nd_gamma), 10) gamma = st_gamma K = np.exp(-gamma * squareform(pdist(features))**2) alphas = np.dot(np.linalg.inv(K + alpha * np.eye(len(features))), targets) kkr = KernelRidge(alpha=alpha, gamma=gamma, kernel='rbf') kkr.fit(features, targets) self.param['n_train'] = len(features) self.param['lambda'] = alpha self.param['sigma'] = 1 / np.sqrt(2 * gamma) self.xU = features self.yU = targets self.predictor = kkr self.alphas = alphas return gamma
def AlgoKRR(df_train, df_trainY): # model = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5) rmsle_cv(model, df_train, df_trainY) model.fit(df_train, df_trainY) result = model.predict(df_train) print("rms value of same set: ", np.around(sqrt(mean_squared_error(df_trainY, result)), decimals=7)) return model
def lgo_sklearn(X,y, groups, regparam): logo = LeaveOneGroupOut() errors = [] for train, test in logo.split(X, y, groups=groups): rls = KernelRidge(kernel="rbf", gamma=0.01) rls.fit(X[train], y[train]) p = rls.predict(X[test]) e = sqerror(y[test], p) errors.append(e) return np.mean(errors)
def lpo_sklearn(X,y, regparam): lpo = LeavePOut(p=2) preda = [] predb = [] for train, test in lpo.split(X): rls = KernelRidge(kernel="rbf", gamma=0.01) rls.fit(X[train], y[train]) p = rls.predict(X[test]) preda.append(p[0]) predb.append(p[1]) return preda, predb
class VADEstimator(BaseEstimator): def fit( self, x , y , size=1 ): self.model = Sequential() self.model.add(Dense( int( embeddings_dim / 2.0 ) , input_dim=embeddings_dim , init='uniform' , activation='tanh')) self.model.add(Dense( int( embeddings_dim / 4.0 ) , init='uniform' , activation='tanh')) self.model.add(Dense(size , init='uniform' ) ) self.model.compile(loss='mse', optimizer='rmsprop') self.model = KernelRidge( kernel='rbf' ) self.model.fit( x , y ) def predict( self, x ): if isinstance( self.model , Sequential ): return self.model.predict( x , verbose=0 )[ 0 ] return self.model.predict( x )
def ANM_causation_score(self,train_size=0.5,independence_criterion='HSIC',metric='linear',regression_method='GP'): ''' Measure how likely a given causal direction is true Parameters ---------- train_size : Fraction of given data used to training phase independence_criterion : kruskal for Kruskal-Wallis H-test, HSIC for Hilbert-Schmidt Independence Criterion metric : linear, sigmoid, rbf, poly kernel function to compute gramm matrix for HSIC gaussian kernel is used in : Nonlinear causal discovery with additive noise models Patrik O. Hoyer et. al Returns ------- causal_strength: A float between 0. and 1. ''' Xtrain, Xtest , Ytrain, Ytest = train_test_split(self.X, self.Y, train_size = train_size) if regression_method == 'GP': _gp = pyGPs.GPR() # specify model (GP regression) _gp.getPosterior(Xtrain, Ytrain) # fit default model (mean zero & rbf kernel) with data _gp.optimize(Xtrain, Ytrain) # optimize hyperparamters (default optimizer: single run minimize) #Forward case #_gp = KernelRidge(kernel='sigmoid',degree=3) #_gp.fit(Xtrain,Ytrain) ym, ys2, fm, fs2, lp = _gp.predict(Xtest) #_gp.plot() #errors_forward = _gp.predict(Xtest) - Ytest errors_forward = ym - Ytest else: _gp = KernelRidge(kernel='sigmoid') _gp.fit(Xtrain, Ytrain) errors_forward = _gp.predict(Xtest) - Ytest #Independence score forward_indep_pval = { 'kruskal': kruskal(errors_forward,Xtest)[1], 'HSIC': self.HilbertSchmidtNormIC(errors_forward,Xtest,metric=metric)[1] }[independence_criterion] return {'causal_strength':forward_indep_pval}
def plot_kernel_ridge(X, y, gamma=0.5, alpha=0.1): # kernel (ridge) regression krr = KernelRidge(kernel="rbf", gamma=gamma, alpha=alpha); krr.fit(X,y); # predict x_plot = np.linspace(min(X), max(X), 100)[:,np.newaxis]; y_plot = krr.predict(x_plot); # plot plt.figure(figsize=(8,4.8)); plt.plot(X, y, 'or'); plt.plot(x_plot, y_plot) # plt.title(r"Gaussian Kernel ($\gamma=%0.2f, \alpha=%0.2f$)" % (gamma,alpha), fontsize=16) plt.title(r"Gaussian Kernel ($\gamma=%0.2f$)" % (gamma), fontsize=16)
def train_kernelRidgeModel(X, y, alpha=1, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None): """ Train a kernel ridge regression model """ model = KernelRidge( alpha=alpha, kernel=kernel, gamma=gamma, degree=degree, coef0=coef0, kernel_params=kernel_params ) model = model.fit(X, y) return model
def modelfitOne(train_X, train_y, test_X, yd, ImageId, FeatureName): n_clf = 1 # 拟合器 clf = KernelRidge(kernel='rbf', gamma=6e-4, alpha=2e-2) # 训练 print('-----------------开始训练...------------------') clf.fit(train_X, train_y) # 预测 print('-----------------开始预测...------------------') pred = clf.predict(test_X) predicted = np.zeros(len(FeatureName)) for i in range(len(FeatureName)): if i % 500 == 0: print('i =', i) else: pass imageID = ImageId[i] clfID = yd[FeatureName[i]] predicted[i] = pred[imageID, clfID] predicted = predicted*48.+48. return predicted
num_folds = 5 #data is divide into 5 time slices Overall_Y_Pred = np.zeros(len(X)) for i in [t+1 for t in list(range(4))]: to_exclude = list(range(i)) folder_train = np.asarray(to_exclude).astype(int) #index_train starts with the first folder index_train = index[folder_train]; index_test = [element for i, element in enumerate(index) if i not in to_exclude] print (len(index_test)) #train set starts with the first folder X_train = X[np.hstack(index_train)] Y_train = Y[np.hstack(index_train)] X_test = X[np.hstack(index_test)] Y_test = Y[np.hstack(index_test)] # train on training sets model.fit(X_train, Y_train) Y_test_Pred = model.predict(X_test) rmse = np.sqrt(mean_squared_error(Y_test, Y_test_Pred)) rmse_list.append(rmse) print (rmse_list) #Plot: y = np.asarray(rmse_list) x = np.asarray([t+1 for t in list(range(4))]) plt.plot(x, y, x, y, 'rs') plt.title('Number of Folders in Training Set vs. rmse of Test Set') plt.xlabel('Number of Folders in Training Set') plt.ylabel('Overall RMSE of Test Set') plt.grid(True) plt.show()
def main(): T = 10.0 # Simulation temperature dt = 1 * units.fs # MD timestep nsteps = 500 # MD number of steps mixing = [1,-1,0] # [1.0, -1.0, 0.3] # mixing weights for "real" and ML forces lengthscale = 0.6 # KRR Gaussian width. gamma = 1 / (2 * lengthscale**2) grid_spacing = 0.05 # mlmodel = GaussianProcess(corr='squared_exponential', # # theta0=1e-1, thetaL=1e-4, thetaU=1e+2, # theta0=1., # random_start=100, normalize=False, nugget=1.0e-2) mlmodel = KernelRidge(kernel='rbf', gamma=gamma, gammaL = gamma/4, gammaU=2*gamma, alpha=5.0e-2, variable_noise=False, max_lhood=True) anglerange = sp.arange(0, 2*sp.pi + grid_spacing, grid_spacing) X_grid = sp.array([[sp.array([x,y]) for x in anglerange] for y in anglerange]).reshape((len(anglerange)**2, 2)) ext_field = IgnoranceField(X_grid, y_threshold=1.0e-1, cutoff = 3.) # Bootstrap from initial database? uncomment data = sp.loadtxt('phi_psi_minener_coarse_1M_md.csv') data[:,:2] -= 0.025 # fix because of old round_vector routine mlmodel.fit(data[:,:2], data[:,2]) ext_field.update_cost(mlmodel.X_fit_, mlmodel.y) # Prepare diagnostic visual effects. plt.close('all') plt.ion() fig, ax = plt.subplots(1, 2, figsize=(24, 13)) atoms = ase.io.read('myplum.xyz') with open('data.input', 'r') as file: lammpsdata = file.readlines() # Set temperature MaxwellBoltzmannDistribution(atoms, 0.5 * units.kB * T, force_temp=True) # Set total momentum to zero p = atoms.get_momenta() p -= p.sum(axis=0) / len(atoms) atoms.set_momenta(p) atoms.rescale_velocities(T) # Select MD propagator mdpropagator = Langevin(atoms, dt, T*units.kB, 1.0e-2, fixcm=True) # mdpropagator = MLVerlet(atoms, dt, T) # Zero-timestep evaluation and data files setup. print("START") pot_energy, f = calc_lammps(atoms, preloaded_data=lammpsdata) mlmodel.accumulate_data(round_vector(atoms.colvars(), precision=grid_spacing), pot_energy) printenergy(atoms, pot_energy) try: os.remove('atomstraj.xyz') except: pass traj = open("atomstraj.xyz", 'a') atoms.write(traj, format='extxyz') results, traj_buffer = [], [] # When in the simulation to update the ML fit -- optional. teaching_points = sp.unique((sp.linspace(0, nsteps**(1/3), nsteps/20)**3).astype('int') + 1) # MD Loop for istep in range(nsteps): print("Dihedral angles | phi = %.3f, psi = %.3f " % (atoms.phi(), atoms.psi())) do_update = False # (istep % 10 == 9) # (istep in teaching_points) or (istep - nsteps == 1) # istep % 20 == 0 # mdpropagator.halfstep_1of2(f) f, pot_energy, _ = get_all_forces(atoms, mlmodel, grid_spacing, extfield=None, mixing=mixing, lammpsdata=lammpsdata, do_update=do_update) mdpropagator.halfstep_2of2(f) # manual cooldown!!! if sp.absolute(atoms.get_kinetic_energy() / (1.5 * units.kB * atoms.get_number_of_atoms()) - T) > 50: atoms.rescale_velocities(T) printenergy(atoms, pot_energy/atoms.get_number_of_atoms(), step=istep) if do_update: try: print("Lengthscale = %.3e, Noise = %.3e" % (1/(2 * mlmodel.gamma)**0.5, mlmodel.noise.mean())) except: print("") if 'datasetplot' not in locals(): datasetplot = pl.Plot_datapts(ax[0], mlmodel) else: datasetplot.update() if hasattr(mlmodel, 'dual_coef_'): if 'my2dplot' not in locals(): my2dplot = pl.Plot_energy_n_point(ax[1], mlmodel, atoms.colvars().ravel()) else: my2dplot.update_prediction() my2dplot.update_current_point(atoms.colvars().ravel()) fig.canvas.draw() # fig.canvas.print_figure('current.png') traj_buffer.append(atoms.copy()) if istep % 1 == 0: for at in traj_buffer: atoms.write(traj, format='extxyz') traj_buffer = [] results.append(sp.array([atoms.phi(), atoms.psi(), pot_energy])) traj.close() print("FINISHED") sp.savetxt('results.csv', sp.array(results)) sp.savetxt('mlmodel.dual_coef_.csv', mlmodel.dual_coef_) sp.savetxt('mlmodel.X_fit_.csv', mlmodel.X_fit_) sp.savetxt('mlmodel.y.csv', mlmodel.y) calc = None return mlmodel
regr = linear_model.LinearRegression() scores = cross_val_score(regr, data.df[inputVariables].values, data.df['count'].values) print("Linear Regression cross validation score: ", scores.mean()) regr.fit(X_train_sum, y_train_sum) print("Linear Regression training score: ", regr.score(X_train_sum, y_train_sum)) print("Linear Regression testing score: ", regr.score(X_test_sum, y_test_sum)) ##### Kernel Ridge and Support Vector Regression ##### ## Finding the best parameters alpha=[1,1e-1,1e-2,1e-3] for a in alpha: kr = KernelRidge(kernel='rbf', alpha=a) kr.fit(X_train_sum, y_train_sum) print("Kernel Ridge train score: ", kr.score(X_train_sum, y_train_sum), " for alpha = %s" %a) print("Kernel Ridge test score: ", kr.score(X_test_sum, y_test_sum), " for alpha = %s" %a) ### Using GridSearchCV param_grid = { 'alpha': [1, 1e-1, 1e-2] "gamma": np.logspace(-2, 2, 5) } GSKernelRidge = GridSearchCV(KernelRidge(kernel='rbf'), param_grid=param_grid) GSKernelRidge.fit(X_train_sum, y_train_sum)
affective[ row["Word"].lower() ] = np.array( [ float( row["V.Mean.Sum"] ) , float( row["A.Mean.Sum"] ) , float( row["D.Mean.Sum"] ) ] ) # Expand dictionary of affective words embeddings_dim = 300 max_words = 100000 embeddings = dict( ) embeddings = Word2Vec.load_word2vec_format( "GoogleNews-vectors-negative300.bin.gz" , binary=True ) train_matrix = [ ] train_labels = [ ] for word,scores in affective.items(): try: train_matrix.append( embeddings[word] ) train_labels.append( scores ) except: continue model = KernelRidge( kernel='poly' , degree=4 ) model.fit( train_matrix , train_labels ) textdata = " ".join( open(sys.argv[1] + ".revised.txt",'r').readlines( ) ) tokenizer = Tokenizer(nb_words=max_words, filters=keras.preprocessing.text.base_filter(), lower=True, split=" ") tokenizer.fit_on_texts( textdata ) for word, index in tokenizer.word_index.items(): try: if not affective.has_key(word) : affective[word] = np.array( model.predict( np.array( embedding[word] ).reshape(1, -1) )[0] ) except: affective[word] = np.array( [ 5.0 , 5.0 , 5.0 ] ) # Process the textual contents textdata = "" file1 = open(sys.argv[1] + ".revised.txt",'r') with file1 as myfile: textdata = re.sub( ">", ">" , re.sub("<" , "<" , re.sub( "&" , "&" , re.sub( " +", "\n\n" , re.sub( "\t" , " ", re.sub( "\r" , "" , "".join( myfile.readlines() ) ) ) ) ) ) ) corenlp = StanfordCoreNLP( ) file2 = open(sys.argv[1] + ".annotated.tsv",'w') file3 = open(sys.argv[1] + ".annotated.xml",'w')
tokenizer = Tokenizer(nb_words=max_features, filters=keras.preprocessing.text.base_filter(), lower=True, split=" ") tokenizer.fit_on_texts(train_texts) train_sequences = sequence.pad_sequences( tokenizer.texts_to_sequences( train_texts ) , maxlen=max_sent_len ) test_sequences = sequence.pad_sequences( tokenizer.texts_to_sequences( test_texts ) , maxlen=max_sent_len ) train_matrix = tokenizer.texts_to_matrix( train_texts ) test_matrix = tokenizer.texts_to_matrix( test_texts ) embedding_weights = np.zeros( ( max_features , embeddings_dim ) ) for word,index in tokenizer.word_index.items(): if index < max_features: try: embedding_weights[index,:] = embeddings[word] except: embedding_weights[index,:] = np.random.rand( 1 , embeddings_dim ) print ("") print ("Method = Linear ridge regression with bag-of-words features") model = KernelRidge( kernel='linear' ) model.fit( train_matrix , train_labels ) results = model.predict( test_matrix ) if not(is_geocoding): print ("RMSE = " + repr( np.sqrt(mean_squared_error( test_labels , results )) ) ) print ("MAE = " + repr( mean_absolute_error( test_labels , results ) ) ) else: print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) ) print ("") print ("Method = MLP with bag-of-words features") np.random.seed(0) model = Sequential() model.add(Dense(embeddings_dim, input_dim=train_matrix.shape[1], init='uniform', activation='relu')) model.add(Dropout(0.25)) model.add(Dense(embeddings_dim, activation='relu'))
#MAE for SGD: 9.04117895779 #MSE for SGD 292.104437304 #R2 for SGD 0.954873464267''' ####Develop models using various tuned algorithms above lr = LinearRegression() lr.fit(x_train, y_train) y_predicted = lr.predict(x_test) svr = SVR(C=10, gamma =1, kernel = 'linear') svr.fit(x_train_scaled, y_train) y2 = svr.predict(x_test_scaled) kr = KernelRidge(alpha=0.0001, coef0=1, degree=1, gamma=0.001, kernel='rbf',kernel_params=None) kr.fit(x_train_scaled, y_train) y3 = kr.predict(x_test_scaled) lasso = Lasso(alpha=1e-09) lasso.fit(x_train_scaled, y_train) y4 = lasso.predict(x_test_scaled) linear_ridge = Ridge(alpha=0.1) linear_ridge.fit(x_train_scaled,y_train) y5 = linear_ridge.predict(x_test_scaled) bayesian_ridge = BayesianRidge(alpha_1=1e-05, alpha_2=10, lambda_1=10, lambda_2=1e-05) bayesian_ridge.fit(x_train_scaled, y_train) y6 = bayesian_ridge.predict(x_test_scaled) sgd = SGDRegressor(alpha=0.1, epsilon=0.001, l1_ratio=0.2, loss='squared_loss', penalty='none', power_t=0.2)
n_alphas = 50 alphas = np.logspace(-1, 8, n_alphas) ridge = Ridge(fit_intercept=True) kernel_ridge = KernelRidge(kernel='poly', gamma=1, degree=3, coef0=1) test_scores_ridge = [] test_scores_kernel = [] for alpha in alphas: ridge.set_params(alpha=alpha) ridge.fit(X_train_sc, y_train_sc) test_mse = mean_squared_error_scorer(ridge, X_test_sc, y_test_sc) test_scores_ridge.append(test_mse) kernel_ridge.set_params(alpha=alpha) kernel_ridge.fit(X_train_sc, y_train_sc) test_mse = mean_squared_error_scorer(kernel_ridge, X_test_sc, y_test_sc) test_scores_kernel.append(test_mse) poly = PolynomialNetworkRegressor(degree=3, n_components=2, tol=1e-3, warm_start=True, random_state=0) test_scores_poly = [] for alpha in alphas: poly.set_params(beta=alpha) poly.fit(X_train_sc, y_train_sc) test_mse = mean_squared_error_scorer(poly, X_test_sc, y_test_sc) test_scores_poly.append(test_mse)
############################################################################# # Fit regression model train_size = 18630 C = 3e6 gamma = 0.01 svr = SVR(kernel='rbf', C=C, gamma=gamma) alpha = 0.23 gamma1 = 0.01 kr = KernelRidge(kernel='rbf', gamma=gamma1,alpha = alpha) t0 = time.time() svr.fit(X[:train_size], y[:train_size]) svr_fit = time.time() - t0 t0 = time.time() kr.fit(X[:train_size], y[:train_size]) kr_fit = time.time() - t0 t0 = time.time() y_svr = svr.predict(X_plot) svr_predict = time.time() - t0 t0 = time.time() y_kr = kr.predict(X_plot) kr_predict = time.time() - t0 xk = np.arange(18630+1440)[:,None] ############################################################################# # look at the results err1 = np.abs(svr.predict(X)-z)/z err2 = np.abs(kr.predict(X)-z)/z
#### KERNEL RIDGE REGRESSION alphaVec = [0.1, 0.01] sigmaVec = np.arange(5.0, 5.5, 0.5) if len(alphaVec) > 1 or len(sigmaVec) > 1: # Grid search of parameters param_grid = {"alpha": alphaVec, "kernel": [RBF(length_scale) for length_scale in sigmaVec]} kr = KernelRidge() kr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid) else: # Run with pre-defined parameter set kr = KernelRidge(alpha=alphaVec[0], kernel='rbf', gamma=sigmaVec[0]) # Fit model kr.fit(predictor.reshape(-1,1), predictand.reshape(-1,1)) # Get best parameters bestAlpha_kr = kr.best_params_['alpha'] bestSigma_kr = kr.best_params_['kernel'].length_scale # Predict over grid kr_fit = kr.predict(predictor_grid.reshape(-1,1)) # Compute derivatives of prediction kr_der1 = np.gradient(kr_fit[:,0]) kr_der2 = np.gradient(kr_der1) # Estimate decorrelation time KR if bestSigma_kr >= 2: minDer1 = 0.005 #0.001
px = [] py = [] with open('/home/redwards/Desktop/genus_species_analysis/pseudo_coverage.txt', 'r') as fin: for l in fin: p = l.strip().split("\t") px.append(float(p[0])) py.append(float(p[1])) ny = np.array(y) nx = np.array(x) pnx = np.array(px) pny = np.array(py) kr = KernelRidge(kernel='rbf', gamma=7.5e-5, alpha=0.001) kr.fit(nx[:, None], ny[:, None]) x_pred = np.linspace(min(x), max(x), 10000)[:, None] y_pred = kr.predict(x_pred) kr.fit(pnx[:, None], pny[:, None]) px_pred = np.linspace(min(px), max(px), 10000)[:, None] py_pred = kr.predict(px_pred) fig = plt.figure() ax = fig.add_subplot(111) """ These regions come from http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2562909/
test_matrix1 = preprocessing.scale( test_matrix1 ) data2 = [ ( [ float(row[i]) for i in range(len(row) - 2) ] , ( float( row[ len(row) - 2 ] ) , float( row[ len(row) - 1 ] ) ) ) for row in csv.reader( open("default_plus_chromatic_features_1059_tracks.txt"), delimiter=',', quoting=csv.QUOTE_NONE) ] np.random.seed(0) np.random.shuffle( data2 ) train_size2 = int(len(data2) * percent) train_matrix2 = np.array( [ features for ( features, label ) in data2[0:train_size2] ] ) test_matrix2 = np.array( [ features for ( features, label ) in data2[train_size2:-1] ] ) train_labels2 = [ label for ( features , label ) in data2[0:train_size2] ] test_labels2 = [ label for ( features , label ) in data2[train_size2:-1] ] train_matrix2 = preprocessing.scale( train_matrix2 ) test_matrix2 = preprocessing.scale( test_matrix2 ) print ("") print ("Method = Linear ridge regression - Default features") model = KernelRidge( kernel='linear' ) model.fit( train_matrix1 , train_labels1 ) results = model.predict( test_matrix1 ) print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Method = Linear ridge regression - Default features + chromatic features") model = KernelRidge( kernel='linear' ) model.fit( train_matrix2 , train_labels2 ) results = model.predict( test_matrix2 ) print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels2[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels2[i] ) for i in range(results.shape[0]) ] ) ) ) print ("") print ("Method = Random forest regression - Default features") model = RandomForestRegressor( n_estimators=100 , random_state=0 ) model.fit( train_matrix1 , train_labels1 ) results = model.predict( test_matrix1 )
class RidgeMKL: """A MKL model in a transductive setting (test points are presented at training time). """ mkls = { "align": Align, "alignf": Alignf, "alignfc": Alignf, "uniform": UniformAlignment, } mkls_low_rank = { "align": AlignLowRank, "alignf": AlignfLowRank, "alignfc": AlignfLowRank, "uniform": UniformAlignmentLowRank, } # alignf expects kernels to be centered centered = {"alignf", "alignfc"} supervised = {"align", "alignf", "alignfc"} def __init__(self, lbd=0, method="align", method_init_args={}, low_rank=False): """ :param method: (``string``) "align", "alignf", or "uniform", MKL method to be used. :param low_rank: (``bool``) Use low-rank approximations. :param method_init_args: (``dict``) Initialization arguments for the MKL methods. :param lbd: (``float``) L2-regularization. """ self.method = method if not low_rank: self.mkl_model = self.mkls[method](**method_init_args) if method == "alignfc": init_args = method_init_args.copy() init_args["typ"] = "convex" self.mkl_model = self.mkls[method](**init_args) else: self.mkl_model = self.mkls_low_rank[method](**method_init_args) if method == "alignfc": init_args = method_init_args.copy() init_args["typ"] = "convex" self.mkl_model = self.mkls_low_rank[method](**init_args) self.lbd = lbd self.low_rank = low_rank self.trained = False def fit(self, Ks, y, holdout=None): """Learn weights for kernel matrices or Kinterfaces. :param Ks: (``list``) of (``numpy.ndarray``) or of (``Kinterface``) to be aligned. :param y: (``numpy.ndarray``) Class labels :math:`y_i \in {-1, 1}` or regression targets. :param holdout: (``list``) List of indices to exlude from alignment. """ # Expand kernel interfaces to kernel matrices expand = lambda K: K[:, :] if isinstance(K, Kinterface) else K Hs = map(expand, Ks) # Assert correct dimensions assert Ks[0].shape[0] == len(y) # Fit MKL model if self.method in self.supervised: self.mkl_model.fit(Hs, y, holdout=holdout) else: self.mkl_model.fit(Hs) if self.low_rank: self.X = hstack(map(lambda e: sqrt(e[0]) * e[1], zip(self.mkl_model.mu, Hs))) if self.method in self.centered: self.X = center_kernel_low_rank(self.X) self.X[where(isnan(self.X))] = 0 # Fit ridge model with given lbd and MKL model self.ridge = KernelRidge(alpha=self.lbd, kernel="linear", ) # Fit ridge on the examples minus the holdout set inxs = list(set(range(Hs[0].shape[0])) - set(holdout)) self.ridge.fit(self.X[inxs], y[inxs]) self.trained = True else: # Fit ridge model with given lbd and MKL model self.ridge = KernelRidge(alpha=self.lbd, kernel=self.mkl_model, ) # Fit ridge on the examples minus the holdout set inxs = array(list(set(range(Hs[0].shape[0])) - set(holdout))) inxs = inxs.reshape((len(inxs), 1)).astype(int) self.ridge.fit(inxs, y[inxs]) self.trained = True def predict(self, inxs): """ Predict values for data on indices inxs (transcductive setting). :param inxs: (``list``) Indices of samples to be used for prediction. :return: (``numpy.ndarray``) Vector of prediction of regression targets. """ assert self.trained if self.low_rank: return self.ridge.predict(self.X[inxs]) else: inxs = array(inxs) inxs = inxs.reshape((len(inxs), 1)).astype(int) return self.ridge.predict(inxs).ravel()
diff_fano = diff_fano[~np.isnan(diff_fano)] pna.cal_CohenD(diff_fano) """ temp_script """ x, y = data_tuning_mean[:,:,-10:-1].ravel(), data_tuning_std[:,:,-10:-1].ravel() kr = KernelRidge() kr.fit(x,y) kr = kernel_regression.KernelReg(y, x, ['c'], bw=[np.std(x)/5]) plt.plot(x,y, '.') plt.plot(x, kr.fit(x)[0], 'o') for i in range(data_tuning_mean.shape[0]): plot_kr(data_tuning_mean[i, :, -3].ravel(), data_tuning_std[i, :, -3].ravel(), color=colors[i], linestyle=linestyles[i]) """ legacy code """ data_neuro_cur = signal_align.select_signal(data_neuro_spk, chan_filter=range( 0,32), sortcode_filter=range(1,4)) data_neuro_cur = signal_align.select_signal(data_neuro_spk, chan_filter=range(33,48), sortcode_filter=range(1,4)) plt.figure() for i in range(data_neuro_cur['data'].shape[2]):
def parametrize_environment_specific(settings, rerun): channel_name = settings["embedding_options"]["channel_name"] log << log.mg << "Parametrizing" << channel_name << "model" << log.endl soap_types = SETTINGS["soap_types"] log << "Particle SOAP types are" << ", ".join(soap_types) << log.endl # PATHS - for example: # { "xyz_file": "data_esol/structures.xyz", # "soap_file": "data_esol/structures.soap", # "kmat_file": "data_esol/kernel.npy", # "targets_file": "data_esol/targets.npy", # "range_file": "data_esol/range.json", # "weights_file": "data_esol/weights.npy" } paths = copy.deepcopy(settings["paths"]) for p,v in paths.iteritems(): paths[p] = os.path.join(PATH, v) log << "Path to %s = %s" % (p, paths[p]) << log.endl configs = soap.tools.io.read(paths["xyz_file"]) # SOAP soap_options = SETTINGS["soap_options"][settings["soap_options_ref"]] if rerun or not os.path.isfile(paths["soap_file"]): log << "Make target: %s" % paths["soap_file"] << log.endl soap_configure_default(types=soap_types) dset = soap_evaluate(configs, soap_options, paths["soap_file"]) else: log << "Load target: %s" % paths["soap_file"] << log.endl dset = soap.DMapMatrixSet(paths["soap_file"]) # KERNEL kernel_options = settings["kernel_options"] if rerun or not os.path.isfile(paths["kmat_file"]): log << "Make target: %s" % paths["kmat_file"] << log.endl K = kernel_evaluate(dset, kernel_options, paths["kmat_file"]) else: log << "Load target: %s" % paths["kmat_file"] << log.endl K = np.load(paths["kmat_file"]) # TARGETS target_key = settings["regression_options"]["target_key"] if rerun or not os.path.isfile(paths["targets_file"]): log << "Make target: %s" % paths["targets_file"] << log.endl targets = np.array([float(c.info[target_key]) for c in configs]) np.save(paths["targets_file"], targets) else: log << "Load target: %s" % paths["targets_file"] << log.endl targets = np.load(paths["targets_file"]) # MODEL regr_options = settings["regression_options"] if rerun or not os.path.isfile(paths["weights_file"]): log << "Make target: %s" % paths["weights_file"] << log.endl y_avg = np.average(targets) krr = KernelRidge( alpha=regr_options["lreg"], kernel='precomputed') krr.fit(K**regr_options["xi"], targets) y_predict = krr.predict(K**regr_options["xi"]) kweights = krr.dual_coef_ np.save(paths["weights_file"], kweights) np.save(paths["pred_file"], y_predict) else: log << "Load target: %s" % paths["weights_file"] << log.endl kweights = np.load(paths["weights_file"]) y_predict = np.load(paths["pred_file"]) if rerun or not os.path.isfile(paths["range_file"]): dset_attr = soap.DMapMatrixSet(paths["soap_file"]) delta_Ys = kernel_attribute(dset_attr, dset, kernel_options, kweights, regr_options["xi"]) json.dump(delta_Ys, open(paths["range_file"], "w")) else: delta_Ys = json.load(open(paths["range_file"]))
#from sklearn.svm import SVR from sklearn.kernel_ridge import KernelRidge import numpy as np n_samples, n_features = 10, 5 np.random.seed(0) y = np.random.randn(n_samples) print y print X = np.random.randn(n_samples, n_features) print X #clf = SVR(C=1.0, epsilon=0.2) clf = KernelRidge(alpha=1.0) clf.fit(X, y) print y[1] print clf.predict(X[1])
##################################################################### # --- RUN THE MODEL: FOR A GIVEN SPLIT AND EACH PARAMETER TRIAL --- # ##################################################################### # For each parameter trial for i in xrange(trials): # For regression use the Kernel Ridge method if model_type == "regression": print "\n Starting experiment for trial %d and parameter alpha = %3f\n " % (i, alpha_grid[i]) # Fit the kernel ridge model KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i]) KR.fit(K_train, y_train) # predict on the validation and test set y_pred = KR.predict(K_val) y_pred_test = KR.predict(K_test) # adjust prediction: needed because the training targets have been normalizaed y_pred = y_pred * float(y_train_std) + y_train_mean y_pred_test = y_pred_test * float(y_train_std) + y_train_mean # root mean squared error on validation rmse = np.sqrt(mean_squared_error(y_val, y_pred)) perf_all_val.append(rmse) # root mean squared error in test rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
sum = 0. for i in range(0,len(X_train)): sum += alpha[i] * kernel(X_train[i],x) return sum return f def score(f, X_test, y_test): error = 0. for i in range(0, len(X_test)): prediction = f(X_test[i]) if isinstance(prediction,np.ndarray): prediction = prediction[0] error += pow((prediction - y_test[i]),2) return error/len(X_test) # Make up data X, y, true_coefficient = make_regression(n_samples=80, n_features=30, n_informative=20, noise=10, coef=True, random_state=20140210) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=5) # Run Scikit Kernel Ridge Regression clf = KernelRidge() clf.fit(X_train,y_train) print 'SCIKIT: mean square test error:', score( clf.predict, X_test, y_test) # Run this implementation f = kernel_ridge_regression(X_train,y_train,1) score_val = score(f, X_test, y_test) print 'Custom: mean square test error:', score_val
# Choose the number of predicted peptides and their length n_predictions = 1000 y_length = 5 # Max time (seconds) for the branch and bound search max_time = 500 print('String maximization model on BPPs dataset') gs_kernel = GenericStringKernel(AminoAcidFile.blosum62_natural, sigma_position, sigma_amino_acid, n, is_normalized=True) alphabet = gs_kernel.alphabet dataset = load_bpps_dataset() # Use a regression algorithm to learn the weights first print('Learning the regression weights ...') learner = KernelRidge(alpha, kernel='precomputed') gram_matrix = gs_kernel(dataset.X, dataset.X) learner.fit(gram_matrix, dataset.y) learned_weights = learner.dual_coef_ # We can then use the string maximization model with the learned weights print('Branch and bound search for the top {} peptides of length {} ...'.format(n_predictions, y_length)) model = StringMaximizationModel(alphabet, n, gs_kernel, max_time) model.fit(dataset.X, learned_weights, y_length) peptides, bioactivities = model.predict(n_predictions) print('\n') print('Peptides | Predicted bioactivities') for peptide, bioactivity in zip(peptides, bioactivities): print(peptide, bioactivity)
class Learner(): path = 'matrices/' inputF = 'inputs.npy' stateF = 'states.npy' itrF = 'itr.npy' inptFile = os.path.join(path, inputF) stateFile = os.path.join(path, stateF) itrFile = os.path.join(path, itrF) itr = np.array([]) useSHIV = False THRESH = 0.45 ahqp_solver_g = AHQP(sigma=6) ahqp_solver_b = AHQP(sigma=5,nu=1e-3) def trainModel(self, s=None, a=None): """ Trains model on given states and actions. Uses neural net or SVM based on global settings. """ states, actions = self.states[3:], self.actions[3:] #print "states.shape" #print states.shape #print "actions.shape" #print actions.shape if len(self.itr) == 0: self.itr = np.array([states.shape[0]]) else: self.itr = np.hstack((self.itr, states.shape[0])) '''if states.shape[0] > 2700.0: f = os.path.join(self.path, 'statesToValidate.npy') np.save(f, states) IPython.embed()''' fits = [] #actions = actions.ravel() self.clf = KernelRidge(alpha=1.0) self.clf.kernel = 'rbf' print "SIZE: ", states.shape self.clf.fit(states, actions) #IPython.embed() actions_pred = self.clf.predict(states) bad_state = np.zeros(actions_pred.shape[0]) for i in range(actions_pred.shape[0]): fit = LA.norm(actions_pred[i,:] - actions[i,:]) fits.append(fit) med = np.median(np.array(fits)) for fit in fits: if(fit>med): bad_state[i] = 1 IPython.embed() if self.useSHIV: self.labels = np.zeros(states.shape[0])+1.0 self.scaler = preprocessing.StandardScaler().fit(states) states_proc = self.scaler.transform(states) good_labels = bad_state == 0.0 states_g = states_proc[good_labels,:] bad_labels = bad_state == 1.0 states_b = states_proc[bad_labels,:] #IPython.embed() self.ahqp_solver_g.assembleKernel(states_g, np.zeros(states_g.shape[0])+1.0) self.ahqp_solver_b.assembleKernel(states_b, np.zeros(states_b.shape[0])+1.0) #IPython.embed() self.ahqp_solver_g.solveQP() self.ahqp_solver_b.solveQP() #score = self.clf.score(states, actions) #print score self.plot(fits, states, med) def askForHelp(self,state): if self.useSHIV: state = self.scaler.transform(state) if self.ahqp_solver_b.predict(state)==1.0: return -1.0 else: return self.ahqp_solver_g.predict(state) else: return -1 def plot(self, fits, states, threshold): index = range(len(states)) t = np.ones(len(index)) * threshold plt.figure(1) plt.plot(index, fits, color='b', linewidth=4.0) plt.plot(index, t, color='r', linewidth=4.0) plt.ylabel('Fit') plt.xlabel('Index of State') plt.show() def getAction(self, state): """ Returns a prediction given the input state. Uses neural net or SVM based on global settings. """ return self.clf.predict(state) def initModel(self, useSHIV): self.useSHIV = useSHIV try: self.states = np.load(self.stateFile) self.actions = np.load(self.inptFile) except IOError: self.states = np.array([-8,8.75,0,-12,22,0,-15,21.13043404, 0,-12,18.52173996,0,-15,14.173913, 0,-12,8.08695698,0,0,0,0,0]) self.actions = np.array([0,0,0,0]) #self.trainModel(self.states, self.actions) def updateModel(self, s, a): self.states = np.vstack((self.states, s)) self.actions = np.vstack((self.actions,a)) #self.trainModel(self.states, self.actions) def saveModel(self): path = 'matrices/oldData/' currT = strftime("%Y-%m-%d %H:%M:%S", gmtime()) inptFileOut = os.path.join(path, 'inputs' + currT + '.npy') stateFileOut = os.path.join(path, 'states' + currT + '.npy') np.save(stateFileOut, self.states) np.save(inptFileOut, self.actions) np.save(self.itrFile, self.itr)
Xh_tr[:, k] = (Xh_tr[:, k] - mea_h[k]) / sig_h[k] ############## Kernel Ridge Regression ######################################## from sklearn.kernel_ridge import KernelRidge import scipy.io as sio mf = sio.loadmat( "/data/ISOTROPIC/regression/KRR_rbf_cv_alpha_gamma_sspacing4_tspacing6.mat", squeeze_me=True, struct_as_record=False ) KRR_alpha_opt = mf["KRR_alpha_opt"] print("Optimal alpha:", KRR_alpha_opt) KRR_gamma_opt = mf["KRR_gamma_opt"] print("Optimal gamma:", KRR_gamma_opt) kr = KernelRidge(kernel="rbf", alpha=KRR_alpha_opt, gamma=KRR_gamma_opt) kr.fit(Xl_tr, Xh_tr) ############## Prediction and save to file #################################### import os try: os.remove("/data/ISOTROPIC/data/KRR_rbf_sspacing4_tspacing6.nc") except OSError: pass ncfile2 = Dataset("/data/ISOTROPIC/data/KRR_rbf_sspacing4_tspacing6.nc", "w") ncfile1 = Dataset("/data/ISOTROPIC/data/data_downsampled4.nc", "r") # create the dimensions ncfile2.createDimension("Nt", Nt)
from molml.kernel import AtomKernel from utils import load_qm7 if __name__ == "__main__": # This is just boiler plate code to load the data Xin_train, Xin_test, y_train, y_test = load_qm7() # Look at just a few examples to be quick n_train = 200 n_test = 200 Xin_train = Xin_train[:n_train] y_train = y_train[:n_train] Xin_test = Xin_test[:n_test] y_test = y_test[:n_test] gamma = 1e-7 alpha = 1e-7 kern = AtomKernel(gamma=gamma, transformer=LocalEncodedBond(n_jobs=-1), n_jobs=-1) K_train = kern.fit_transform(Xin_train) K_test = kern.transform(Xin_test) clf = KernelRidge(alpha=alpha, kernel="precomputed") clf.fit(K_train, y_train) train_error = MAE(clf.predict(K_train), y_train) test_error = MAE(clf.predict(K_test), y_test) print("Train MAE: %.4f Test MAE: %.4f" % (train_error, test_error)) print()
if __name__=="__main__": #trains Kronecker RLS for different sample sizes #comparing CPU time and verifying that the learned #dual coefficients are same for both methods regparam = 1.0 for size in [10, 20, 40, 60, 80, 100, 500, 1000, 2000, 4000, 6000]: X1, X2, y = random_data(size, 100) kernel1 = GaussianKernel(X1, gamma=0.01) K1 = kernel1.getKM(X1) kernel2 = GaussianKernel(X2, gamma=0.01) K2 = kernel2.getKM(X2) start = time.clock() rls = KronRLS(K1=K1, K2=K2, Y=y, regparam=regparam) dur = time.clock() - start print("RLScore pairs: %d, CPU time: %f" %(size**2, dur)) #forming full Kronecker product kernel matrix becomes fast #unfeasible if size <=100: K = np.kron(K2, K1) start = time.clock() ridge = KernelRidge(alpha=regparam, kernel="precomputed") ridge.fit(K, y) dur = time.clock() - start print("sklearn pairs: %d, CPU time: %f" %(size**2, dur)) sklearn_coef = ridge.dual_coef_ core_coef = rls.predictor.A.reshape(K1.shape[0], K2.shape[0]).T.ravel() print("Are the coefficients same: %r" %np.allclose(sklearn_coef, core_coef)) else: print("sklearn: too much data") print "*****"