def __init__(self, params): super(%CLASS%, self).__init__(params) tmp = LinearSVR() params = tmp.get_params() for key in params: self.create_new_input(type_="data", label=key, widget_name="std line edit m", widget_pos="besides", pos=-1) del tmp
def __init__(self, params): super(LinearSVRGetParams_NodeInstance, self).__init__(params) tmp = LinearSVR() params = tmp.get_params() for key in params: self.create_new_output(type_="data", label=key, pos=-1) del tmp self.create_new_output(type_="data", label="param dict", pos=-1)
class LinearSVRPermuteCoef: def __init__(self, **kwargs): self.model = LinearSVR(**kwargs) def fit(self, X, y): self.model.fit(X, y) self.coef_ = self.model.coef_ self.intercept_ = self.model.intercept_ def add_coef(arr, fn): arr.append(fn(self.coef_)) add_coef(coeffs_state['max'], np.max) add_coef(coeffs_state['min'], np.min) return self def get_params(self, deep=True): return self.model.get_params(deep) def set_params(self, **kwargs): self.model.set_params(**kwargs) return self def predict(self, X): return self.model.predict(X) def score(self, X, y, sample_weight=None): if sample_weight is not None: return self.model.score(X, y, sample_weight) else: return self.model.score(X, y) @staticmethod def permute_min_coefs(): return coeffs_state['min'] @staticmethod def permute_max_coefs(): return coeffs_state['max'] @staticmethod def reset_perm_coefs(): coeffs_state['min'] = [] coeffs_state['max'] = []
else: test_set.append([ predictiveAttributeNotDegree[i][11], predictiveAttributeNotDegree[i][13] ]) test_result.append([predictiveAttributeNotDegree[i][2]]) svm_reg = LinearSVR(epsilon=1.0, max_iter=10000000) train_result = np.array(train_result) svm_reg.fit(train_set, train_result.ravel()) print(svm_reg.score(test_set, test_result)) prediction = [] for item in test_set: items = [[item[0], item[1]]] prediction.append(svm_reg.predict(items)) pred = np.zeros(len(prediction)) predi = np.array(prediction) for i in range(len(prediction)): pred[i] = predi[i][0] print(("MSE: {}".format(mean_squared_error(pred, test_result)))) print("Params: ", svm_reg.get_params()) test_set = np.array(test_set) test_result = np.array(test_result) from mlxtend.plotting import plot_decision_regions import matplotlib.pyplot as plt plot_decision_regions(test_set, test_result.reshape(-1), clf=svm_reg, legend=2) plt.title('SVM Decision Region Boundary', size=16) plt.show()
test_result_tot.append([predictiveAttributeDegree[i][2]]) train_percent = (len(predictiveAttributeNotDegree)/100)*80 count = 0 for i in range(len(predictiveAttributeNotDegree)): if count < train_percent: count = count + 1 train_set_tot.append([predictiveAttributeNotDegree[i][0], predictiveAttributeNotDegree[i][1], predictiveAttributeNotDegree[i][6], predictiveAttributeNotDegree[i][7], predictiveAttributeNotDegree[i][9], predictiveAttributeNotDegree[i][10], predictiveAttributeNotDegree[i][11], predictiveAttributeNotDegree[i][12],predictiveAttributeNotDegree[i][17], predictiveAttributeNotDegree[i][18]]) train_result_tot.append([predictiveAttributeNotDegree[i][2]]) else: test_set_tot.append([predictiveAttributeNotDegree[i][0], predictiveAttributeNotDegree[i][1], predictiveAttributeNotDegree[i][6], predictiveAttributeNotDegree[i][7], predictiveAttributeNotDegree[i][9], predictiveAttributeNotDegree[i][10], predictiveAttributeNotDegree[i][11], predictiveAttributeNotDegree[i][12],predictiveAttributeNotDegree[i][17], predictiveAttributeNotDegree[i][18]]) test_result_tot.append([predictiveAttributeNotDegree[i][2]]) train_result_tot = np.array(train_result_tot) svm_reg_tot.fit(train_set_tot, train_result_tot.ravel()) print("----ALL ATTRIBUTE: score: ", svm_reg_tot.score(test_set_tot, test_result_tot)) # 0. matr 1.cf 6.tipoCds 7.coorte 9.annodiploma 10.votodip 11.codschool 12.tipoMat 17.mot_sta 18.sta newStudent = [[2933, 2928, 1, 2015, 2015, 100, 200, 9, 3, 10]] real_value = [30] predicted = svm_reg_tot.predict(newStudent) print("----ALL ATTRIBUTE: Predicted: ", predicted) print("----ALL ATTRIBUTE: MSE: ", mean_squared_error(real_value, svm_reg_tot.predict(newStudent))) print("----ALL ATTRIBUTE: Params: ", svm_reg_tot.get_params())
import pickle from sklearn.svm import LinearSVR as SVR from dim_utils import * # Train a linear SVR npzfile = np.load('large_data.npz') model_name = 'model.pickle' X = npzfile['X'] y = npzfile['y'] # we already normalize these values in gen.py # X /= X.max(axis=0, keepdims=True) svr = SVR(C=1) svr.fit(X, y) with open(model_name, 'wb') as pickle_file: pickle.dump(svr, pickle_file) print(svr.get_params()) # to save the parameters # svr.set_params() to restore the parameters # predict
class TextRegressor: param_defaults = {'min_df': 1, 'c_ngmin': 1, 'c_ngmax': 1, 'w_ngmax': 1, 'w_ngmin': 1, 'lowercase': 'word', 'alpha': 1.0, 'C': 1.0, 'mix': 1.0} def __init__(self, regressor='ridge', vectorizer='tf-idf'): if regressor == 'ridge': from sklearn.linear_model import Ridge self.reg = Ridge() elif regressor == 'SVR': from sklearn.svm import SVR self.reg = SVR() elif regressor == 'linearsvr': from sklearn.svm import LinearSVR self.reg = LinearSVR() if vectorizer == 'tf-idf': from sklearn.feature_extraction.text import TfidfVectorizer self.vec = TfidfVectorizer() self.vec_params_default = self.vec.get_params() self.reg_params_default = self.reg.get_params() self._reset() def _reset(self): self.par = dict(self.param_defaults) self.vec_params = self.vec_params_default self.vec.set_params(**self.vec_params) self.reg_params = self.reg_params_default self.reg.set_params(**self.reg_params) def set_params(self, **params): self._reset() self.par.update(params) ngram_analyzer = DocAnalyzer( lowercase=self.par.get('lowercase'), c_ngmin=self.par.get('c_ngmin'), c_ngmax=self.par.get('c_ngmax'), w_ngmin=self.par.get('w_ngmin'), w_ngmax=self.par.get('w_ngmax')) self.vec_params.update( {k:self.par[k] for k in self.par.keys() & self.vec_params.keys()}) self.vec.set_params(**self.vec_params) self.vec.set_params(analyzer=ngram_analyzer) self.reg_params.update( {k:self.par[k] for k in self.par.keys() & self.reg_params.keys()}) self.reg.set_params(**self.reg_params) def get_params(self): return self.par def fit(self, text, outcome): num = None if len(text) == 2: text, num = text x = self.vec.fit_transform(text) if num is not None: x = hstack((x, self.par['mix'] * num), format='csr') self.reg.fit(x, outcome) def predict(self, text, gold=None, gold_rank=None, rank_dir=-1, return_score=False): num = None if len(text) == 2: text, num = text x = self.vec.transform(text) if num is not None: x = hstack((x, self.par['mix'] * num), format='csr') pred = self.reg.predict(x) if return_score: return pred, self._score(gold, pred, gold_rank, rank_dir) else: return pred def _score(self, gold, pred, gold_rank=None, rank_dir=-1, verbose=False): r2 = r2_score(gold, pred) rmse = np.sqrt(mean_squared_error(gold, pred)) if gold_rank is None: gold_rank = rankdata(rank_dir * gold, method='ordinal') pred_rank = rankdata(rank_dir * pred, method='ordinal') corr, _ = pearsonr(gold, pred) rank_corr, _ = pearsonr(gold_rank, pred_rank) if verbose: fmt = ("{}: n={}, min={:.4f}, max={:.4f}, mean={:.4f}, " "var={:.4f}, skew={:.4f}, kurtosis={:.4f}") gold_dsc = describe(gold) pred_dsc = describe(pred) print(fmt.format('gold', gold_dsc[0], *gold_dsc[1], *gold_dsc[2:])) print(fmt.format('pred', pred_dsc[0], *pred_dsc[1], *pred_dsc[2:])) return {'r2': r2, 'rmse': rmse, 'rank_corr': rank_corr, 'corr': corr} def score(self, text, gold, gold_rank=None, rank_dir=-1, verbose=False): pred = self.predict(text) return self._score(gold, pred, gold_rank, rank_dir, verbose=verbose)
data, nrows, ncols = readDataSet("YearPredictionMSD20.txt") X = data[:, 1:91] y = data[:, 0] pca = PCA(n_components=10) pca.fit(X) PCA(copy=True, iterated_power='auto', n_components=10, random_state=None, svd_solver='auto', tol=0.0, whiten=False) print(pca.explained_variance_ratio_) print pca.components_ # print pca.explained_variance_ # print pca.mean_ print pca.n_components_ # print pca.noise_variance_ print pca.components_[1] rowFeatureVector = pca.components_ X = np.dot(rowFeatureVector, X.transpose()) X = X.transpose() print len(X) print X clf = LinearSVR(C=1.0, epsilon=0, verbose=1, max_iter=1000) clf.fit(X, y) print clf.predict(X) print y print clf.score(X, y) print clf.get_params(deep=True)
class Baseline: def __init__(self, city, dest_name): self.city = city self.dest_name = dest_name print 'Baseline implementation for {:s} : {:s}'.format( self.city, self.dest_name) dest_to_idx = { 'bofa': 0, 'church': 1, 'gas_station': 3, 'high_school': 3, 'mcdonalds': 4 } self.idx = dest_to_idx[self.dest_name] self.base_dir = osp.join('../data/dataset', city) self.train_label_filename = osp.join(self.base_dir, 'distance', 'train_labels.h5') self.train_im_list_filename = osp.join(self.base_dir, 'distance', 'train_im_list.txt') self.test_label_filename = osp.join(self.base_dir, 'distance', 'test_labels.h5') self.test_im_list_filename = osp.join(self.base_dir, 'distance', 'test_im_list.txt') self.svr = LinearSVR(verbose=1, epsilon=0, dual=False, tol=1e-3, max_iter=50000, loss='squared_epsilon_insensitive') self.scaler = StandardScaler(copy=False) self.model_filename = osp.join(self.base_dir, 'distance', '{:s}.pkl'.format(self.dest_name)) def collect_train_data(self): with open(self.train_im_list_filename, 'r') as train_f_im: train_im_names = [l.rstrip() for l in train_f_im] print 'Loading train data...' with h5py.File('../data/dataset/train_feats1.mat', 'r') as f: self.train_X = np.asarray(f['train_features'], dtype=np.float32).T with h5py.File(self.train_label_filename, 'r') as train_f_label: self.train_y = train_f_label['label'][:, self.idx].astype(np.float32) # select cities and remove rogue labels # idx = [i for i,n in enumerate(train_im_names) if ((('boston' in n)) and self.train_y[i] < 1e3)] idx = [ i for i, n in enumerate(train_im_names) if self.train_y[i] < 1e3 ] self.train_X = self.train_X[idx, :] self.train_y = self.train_y[idx] assert (self.train_y.shape[0] == self.train_X.shape[0]) print 'Done, using {:d} images for training'.format( self.train_X.shape[0]) def train(self, C=1.0): print 'Scaling...' self.train_X = self.scaler.fit_transform(self.train_X) print 'Training with C = {:f}'.format(C) p = self.svr.get_params() p['C'] = C self.svr.set_params(**p) self.svr.fit(self.train_X, self.train_y) def save_predictions(self): with h5py.File('../data/dataset/test_feats.mat', 'r') as f: print 'Loading feats...' self.test_X = np.asarray(f['test_features'], dtype=np.float32).T with open('../data/dataset/test_filenames.txt', 'r') as f: im_names = [n.rstrip() for n in f] keys = [get_key(im_name) for im_name in im_names] assert (len(im_names) == self.test_X.shape[0]) print 'Loading models...' d = joblib.load(self.model_filename) self.svr = d['svr'] self.scaler = d['scaler'] print 'Scaling...' self.test_X = self.scaler.transform(self.test_X) print 'Predicting...' preds = self.svr.predict(self.test_X) print 'Done!' pred_dict = {key: pred for (key, pred) in zip(keys, preds)} fn = '../data/dataset/test_preds_{:s}.pk'.format(self.dest_name) with open(fn, 'w') as f: pickle.dump(pred_dict, f) print 'Saved', fn def save_current_model(self): joblib.dump({ 'svr': self.svr, 'scaler': self.scaler }, self.model_filename) print self.model_filename, 'saved'
math.sqrt(mse_rf) # 실제값과 예측값 흐름 시각화 하기 (matplotlib 이용) plt.figure() plt.plot(np.arange(98), pred1, label='predict') plt.plot(np.arange(98), test_y, label='발생건수') plt.legend() # support vector machine 사용해보기 from sklearn.svm import LinearSVR # 최적 파라미터 찾기 svm_model = LinearSVR(random_state=486) svm_model.get_params().keys() # svm의 파라미터들 param_grid_svm = { 'C': [1, 10, 100, 1000, 10000, 20000, 50000, 100000], 'epsilon': [0.1, 0.5, 1, 1.25, 1.5, 1.75, 2] } grid_svm = GridSearchCV(svm_model, param_grid=param_grid_svm, cv=5) grid_svm.fit(train_x, train_y) print(grid_svm.best_params_) # best 파라미터 출력 svm_model = LinearSVR(C=50000, random_state=486, epsilon=1.75) svm_model.fit(train_x, train_y) pred_svm = svm_model.predict(test_x) plt.figure()