def fix_time_estimate(events_per_developer, vectorizer, test_acc=False): clf_per_developer = {} bug_ft = {} acc_scores = [] for developer, events in events_per_developer.items(): bug_vectors = vectorizer([event[1] for event in events]) bug_ids = [event[2] for event in events] fix_times = [event[0] for event in events] if len(fix_times) > 9: clf = LinearSVR(C=1000) x_train, x_test, y_train, y_test = train_test_split( bug_vectors, fix_times, test_size=0.2, random_state=42) clf.fit(x_train, y_train) score = clf.score(x_test, y_test) clf.fit(bug_vectors, fix_times) if score > 0.0: acc_scores.append(clf.score(x_test, y_test)) clf_per_developer[developer] = clf for index, bug_id in enumerate(bug_ids): bug_ft[bug_id] = fix_times[index] logger.info("%d out of %d developers covered by fix time estimation" % (len(acc_scores), len(events_per_developer.items()))) if test_acc: print("mean developer fix time r^2 %.2f (+/- %.2f)" % (np.mean(acc_scores), np.std(acc_scores))) import sys sys.exit() return clf_per_developer, bug_ft
def lsvm_regressor(x_trn: pd.DataFrame, y_trn: np.ndarray, x_val: pd.DataFrame, y_val: np.ndarray) -> tuple: x_trn, x_val = x_trn.copy(), x_val.copy() y_trn, y_val = y_trn.copy(), y_val.copy() model = LinearSVR(max_iter=400, C=0.05, random_state=7) _ = model.fit(x_trn, y_trn) training_score = model.score(x_trn, y_trn) validation_score = model.score(x_val, y_val) return model, training_score, validation_score
def scikit_lsvr_test(size): X, y = datasets.make_regression(n_samples=1000, n_features=size, random_state=0, noise=4.0, bias=100.0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) model = LinearSVR(random_state=42) model.fit(X_train, y_train) model.score(X_test, y_test)
def finall_model(data): # 数据标准化 y = pd.DataFrame(data.iloc[:, -1]) data = pd.DataFrame(data.iloc[:, :-1]) ss = StandardScaler().fit(data.loc[range(1994, 2014), :]) data.loc[:, :] = ss.transform(data.loc[:, :]) x_train = data.iloc[:-2, :] x_test = data.iloc[-2:, :] y_train = y.iloc[:-2, :] ss = StandardScaler() y_train = ss.fit_transform(y_train) model1 = LinearRegression() model2 = SVR() model3 = LinearSVR() model4 = MLPRegressor(hidden_layer_sizes=(100, 2)) model1.fit(x_train, y_train) model2.fit(x_train, y_train) model3.fit(x_train, y_train) model4.fit(x_train, y_train) print(model1.score(x_train, y_train)) print(model2.score(x_train, y_train)) print(model3.score(x_train, y_train)) print(model4.score(x_train, y_train)) y_ = model1.predict(x_test) yy = np.sqrt(ss.var_) * y_ + ss.mean_ plt.plot(y.loc[range(1994, 2014), "y"]) plt.scatter([2014, 2015], yy, marker='*') plt.show() return data
class SVMWrapper: def __init__(self, c=1.0, e=0.0, loss="epsilon_insensitive", dual=True, max_iter=1000): self.regressor = LinearSVR(C=c, epsilon=e, loss=loss, dual=dual, max_iter=max_iter) self.training_time = None def train(self, x_train, y_train): start = time.perf_counter() self.regressor.fit(x_train, y_train) self.training_time = time.perf_counter() - start def score(self, x_test, y_test): return self.regressor.score(x_test, y_test) def predict(self, x_test): return self.regressor.predict(x_test) def predict_one(self, x_single): return self.regressor.predict(x_single) def get_training_time(self): if self.training_time is None: raise ValueError() else: return self.training_time
def train_SVM(self, data): train, validacion = data x_tr, y_tr = train x_val, y_val = validacion #print("El set de train tiene {} filas y {} columnas".format(x_tr.shape[0],x_tr.shape[1])) #print("El set de validacion tiene {} filas y {} columnas".format(x_val.shape[0],x_val.shape[1])) print('Start training LinearSVR...') start_time = self.timer() svr = LinearSVR() svr.fit(x_tr, y_tr) print("The R2 is: {}".format(svr.score(x_tr, y_tr))) self.timer(start_time) print("Making prediction on validation data") y_val = np.expm1(y_val) y_val_pred = np.expm1(svr.predict(x_val)) mae = mean_absolute_error(y_val, y_val_pred) print("El mean absolute error de es {}".format(mae)) print('Saving model into a pickle') try: os.mkdir('pickles') except: pass with open('pickles/svrCV.pkl', 'wb') as f: pickle.dump(svr, f) print('Making prediction and saving into a csv') y_test = svr.predict(self.x_test) return y_test
def lin_svm(x, y, x_test, y_test): clf = LinearSVR(random_state=0, tol=1e-5) clf.fit(x, y) acc = clf.score(x_test, y_test) # print("accuracy: {} ".format(acc)) return acc
class LinearSVRPermuteCoef: def __init__(self, **kwargs): self.model = LinearSVR(**kwargs) def fit(self, X, y): self.model.fit(X, y) self.coef_ = self.model.coef_ self.intercept_ = self.model.intercept_ def add_coef(arr, fn): arr.append(fn(self.coef_)) add_coef(coeffs_state['max'], np.max) add_coef(coeffs_state['min'], np.min) return self def get_params(self, deep=True): return self.model.get_params(deep) def set_params(self, **kwargs): self.model.set_params(**kwargs) return self def predict(self, X): return self.model.predict(X) def score(self, X, y, sample_weight=None): if sample_weight is not None: return self.model.score(X, y, sample_weight) else: return self.model.score(X, y) @staticmethod def permute_min_coefs(): return coeffs_state['min'] @staticmethod def permute_max_coefs(): return coeffs_state['max'] @staticmethod def reset_perm_coefs(): coeffs_state['min'] = [] coeffs_state['max'] = []
def svm_regressor(train_data, train_label, test_data, test_label, parameters): min_error = 10000000000 error = [] # tuned_parameters = [{'kernel': ['rbf'], 'gamma': [100,10,1,1e-1, 1e-2,], # 'C': [0.1,1, 10, 100], 'epsilon':[ 100, 1000, 10000,1e6,1e8]}] # # {'kernel': ['linear'], 'C': [1, 10, 100, 1000], 'epsilon': [1, 10,100,1000]}, # # {'kernel':['poly'],'gamma': [1e-3, 1e-4], # # 'C': [1, 10, 100, 1000], 'epsilon':[ 1, 10, 100,1000]}] # # {'kernel': ['linear'], 'C': [1, 10, 100, 1000], 'epsilon': [1e-2, 1e-1, 1, 10]} # clf = GridSearchCV(SVR(), tuned_parameters, cv=5,verbose=1,n_jobs=-1) # clf.fit(train_data, train_label) # print clf.best_params_ # print clf.cv_results_ # tuned_parameters = [{'C': [1e-2,1e-1,1, 10, 100], 'epsilon': [1, 10, 100, 1000,10000]}] # clf = GridSearchCV(LinearSVR(random_state=random_state), tuned_parameters, cv=5, verbose=1, n_jobs=-1) # clf.fit(train_data, train_label) # print clf.best_params_ # print clf.cv_results_ # regr = SVR(kernel='rbf', gamma=0.01,C=100) # regr.fit(train_data, train_label) # score = regr.score(test_data, test_label) # predict = regr.predict(test_data) # predict = map(lambda x: [x], predict) # predict = np.array(predict) # mse = MSE(np.array(predict), test_label) # if (mse[0] < min_error): # min_error = mse[0] # print mse[0] regr = LinearSVR(C=0.001, epsilon=1, random_state=random_state) regr.fit(train_data, train_label) score = regr.score(test_data, test_label) predict = regr.predict(test_data) predict = map(lambda x: [x], predict) predict = np.array(predict) mse = MSE(np.array(predict), test_label) if (mse[0] < min_error): min_error = mse[0] print 'MSE ' + parameters + ' ' + str(mse[0]) df = pd.Series(predict.flatten(), index=test_label.index) price = train_label.append(test_label) plt.title('SVM Regression on ' + parameters) plt.plot(price[1000:-1], label='actual price') plt.plot(df, label='predicted price') plt.legend(loc='lower right') plt.xlabel('Dates') plt.ylabel('Price') # plt.show() directory = './svm/' if not os.path.exists(directory): os.makedirs(directory) plt.savefig(directory + parameters + '.png') plt.close() return
def keplerLinear(self, kepler_df): y = kepler_df['koi_score'] X = kepler_df del X['koi_score'] # delete from X we don't need it # divide X and y into train and test | train on different data | test on different -> good matrix X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) regr = LinearSVR(random_state=0, tol=1e-5) start_time = timeit.default_timer() regr.fit(X, y) end_time = timeit.default_timer() accuracy = regr.score(X_test, y_test) elapsed_time = end_time - start_time return accuracy, elapsed_time
def train_algs(self): """ TRAIN WlTHOUT CROSS VALIDATION """ st.subheader("Results") self.chosen_models_names = [] self.chosen_models = [] if len(self.algorithms) == 0: st.warning('You should select at least one algorithm') return X = self.raw_data.drop(self.out_col, axis=1) y = self.raw_data[self.out_col] msk = np.random.rand(len(X)) < self.percent_train / 100 X_train = X[msk] X_test = X[~msk] Y_train = y[msk] Y_test = y[~msk] for alg in self.algorithms: if alg == 'LinearSVR': from sklearn.svm import LinearSVR svc = LinearSVR() svc.fit(X_train, Y_train) st.write("LinearSVR score", svc.score(X_test, Y_test)) self.chosen_models_names.append('LinearSVR') self.chosen_models.append(svc) elif alg == 'RidgeCV': from sklearn.linear_model import RidgeCV rid = RidgeCV() rid.fit(X_train, Y_train) st.write("RidgeCV score", rid.score(X_test, Y_test)) self.chosen_models_names.append('RidgeCV') self.chosen_models.append(rid) elif alg == 'Random Forest Regressor': from sklearn.ensemble import RandomForestRegressor rfc = RandomForestRegressor() rfc.fit(X_train, Y_train) st.write("rfc score", rfc.score(X_test, Y_test)) self.chosen_models_names.append('Random Forest Regressor') self.chosen_models.append(rfc) elif alg == 'Adaboost': from sklearn.ensemble import AdaBoostRegressor ada = AdaBoostRegressor() ada.fit(X_train, Y_train) st.write("ada score", ada.score(X_test, Y_test)) self.chosen_models_names.append('Adaboost') self.chosen_models.append(ada) elif alg == 'XGBoost': import xgboost as xgb xgb = xgb.XGBRegressor(n_estimators=300) xgb.fit(X_train, Y_train, verbose=0) st.write("xgb score", xgb.score(X_test, Y_test)) self.chosen_models_names.append('XGBoost') self.chosen_models.append(xgb) if self.meta_model_check: if self.meta_model_type == "voting": from sklearn.ensemble import VotingRegressor stack = VotingRegressor(estimators=list( zip(self.chosen_models_names, self.chosen_models))) stack.fit(X_train, Y_train) st.write("voting score", stack.score(X_test, Y_test)) else: from sklearn.ensemble import StackingRegressor if self.meta_model == "GradientBoostingRegressor": from sklearn.ensemble import GradientBoostingRegressor stack = StackingRegressor( estimators=list( zip(self.chosen_models_names, self.chosen_models)), final_estimator=GradientBoostingRegressor()) elif self.meta_model == "RandomForestRegressor": from sklearn.ensemble import RandomForestRegressor stack = StackingRegressor( estimators=list( zip(self.chosen_models_names, self.chosen_models)), final_estimator=RandomForestRegressor()) stack.fit(X_train, Y_train) st.write("stack score", stack.score(X_test, Y_test))
nR_ramp = ramp_signal(nR, 0.01) from sklearn.svm import LinearSVR linear_svm = LinearSVR(C=1e08,fit_intercept = True, dual = True , epsilon = 1e-6, loss = 'squared_epsilon_insensitive', max_iter = 10000, random_state = None, tol = 0.000001, verbose = 0).fit(surgeXC, nU) coefLinear = linear_svm.coef_ linear_svm_ramp = LinearSVR(C=1,fit_intercept = True, dual = True , epsilon = 1e-6, loss = 'squared_epsilon_insensitive', max_iter = 10000, random_state = None, verbose = 0).fit(surgeXC, nU_ramp) coefLinear_ramp = linear_svm_ramp.coef_ print("Train set accuracy of Surge on LinearSVR method: {:.2f}".format(linear_svm.score(surgeXC,nU))) linear_svm1 = LinearSVR(C=1e08,fit_intercept = True, dual = True , epsilon = 1e-6, loss = 'squared_epsilon_insensitive', max_iter = 10000, random_state = None, tol = 0.000001, verbose = 0).fit(swayYC,nV) coefLinear1 = linear_svm1.coef_ linear_svm1_ramp = LinearSVR(C=1e01,fit_intercept = True, dual = True , epsilon = 1e-4, loss = 'squared_epsilon_insensitive', max_iter = 10000, random_state = None, tol = 0.000001, verbose = 0).fit(swayYC,nV_ramp) coefLinear1_ramp = linear_svm1_ramp.coef_
from sklearn.svm import LinearSVR from sklearn.svm import SVR from sklearn.model_selection import GridSearchCV #Regression SVM regressor = SVR(kernel = 'rbf', C = 10, gamma = 0.1) regressor.fit(X_train, y_train) regressor.score(X_test,y_test) #Linear SVM regressor_linear = LinearSVR() regressor_linear.fit(X_train, y_train) regressor_linear.score(X_test,y_test) # use a grid search to find the best parameters (see page 268) ################### ################## ##### Question 2 b ################## ################## from sklearn.tree import DecisionTreeRegressor DT = DecisionTreeRegressor(max_depth=4) DT.fit(X_train,y_train) DT.score(X_test,y_test)
data, nrows, ncols = readDataSet("YearPredictionMSD20.txt") X = data[:, 1:91] y = data[:, 0] pca = PCA(n_components=10) pca.fit(X) PCA(copy=True, iterated_power='auto', n_components=10, random_state=None, svd_solver='auto', tol=0.0, whiten=False) print(pca.explained_variance_ratio_) print pca.components_ # print pca.explained_variance_ # print pca.mean_ print pca.n_components_ # print pca.noise_variance_ print pca.components_[1] rowFeatureVector = pca.components_ X = np.dot(rowFeatureVector, X.transpose()) X = X.transpose() print len(X) print X clf = LinearSVR(C=1.0, epsilon=0, verbose=1, max_iter=1000) clf.fit(X, y) print clf.predict(X) print y print clf.score(X, y) print clf.get_params(deep=True)
class AllRegressionModels: """ Wrapper class around all supported regression models: LinearRegression, RandomForest, SVR, NuSVR, LinearSVR, and XGBRegressor. AllRegressionModels runs every available regression algorithm on the given dataset and outputs the coefficient of determination and execution time of each successful model when all_regression_models() is run. """ def __init__(self, attributes=None, labels=None, test_size=0.25, verbose=False): """ Initializes an AllRegressionModels object. The following parameters are needed to use an AllRegressionModels object: – attributes: a numpy array of the desired independent variables (Default is None) – labels: a numpy array of the desired dependent variables (Default is None) – test_size: the proportion of the dataset to be used for testing the model; the proportion of the dataset to be used for training will be the complement of test_size (Default is 0.25) – verbose: specifies whether or not to ouput any and all logging during model training (Default is False) Note: These are the only parameters allowed. All other parameters for each model will use their default values. For more granular control, please instantiate each model individually. The following instance data is found after running all_regression_models() successfully: – linear_regression: a reference to the LinearRegression model – random_forest: a reference to the RandomForest model – SVR: a reference to the SVR model – nu_SVR: a reference to the NuSVR model – linear_SVR: a reference to the LinearSVR model – XGB_regressor: a reference to the XGBRegressor model After running all_regression_models(), the coefficient of determination and execution time for each model that ran successfully will be displayed in tabular form. Any models that failed to run will be listed. """ self.attributes = attributes self.labels = labels self.test_size = test_size self.verbose = verbose self.linear_regression = LinearRegression() self.random_forest = RandomForestRegressor(verbose=self.verbose) self.SVR = SVR(verbose=self.verbose) self.nu_SVR = NuSVR(verbose=self.verbose) self.linear_SVR = LinearSVR(verbose=self.verbose) self.XGB_regressor = XGBRegressor(verbosity=int(self.verbose)) self._regression_models = {"Model": ["R2 Score", "Time"]} self._failures = [] # Accessor methods def get_attributes(self): """ Accessor method for attributes. If an AllRegressionModels object is initialized without specifying attributes, attributes will be None. all_regression_models() cannot be called until attributes is a populated numpy array of independent variables; call set_attributes(new_attributes) to fix this. """ return self.attributes def get_labels(self): """ Accessor method for labels. If an AllRegressionModels object is initialized without specifying labels, labels will be None. all_regression_models() cannot be called until labels is a populated numpy array of dependent variables; call set_labels(new_labels) to fix this. """ return self.labels def get_test_size(self): """ Accessor method for test_size. Should return a number or None. """ return self.test_size def get_verbose(self): """ Accessor method for verbose. Will default to False if not set by the user. """ return self.verbose def get_all_regression_models(self): """ Accessor method that returns a list of all models. All models within the list will be None if all_regression_models() hasn't been called, yet. """ return [self.linear_regression, self.random_forest, self.SVR, self.nu_SVR, self.linear_SVR, self.XGB_regressor] def get_linear_regression(self): """ Accessor method for linear_regression. Will return None if all_regression_models() hasn't been called, yet. """ return self.linear_regression def get_random_forest(self): """ Accessor method for random_forest. Will return None if all_regression_models() hasn't been called, yet. """ return self.random_forest def get_SVR(self): """ Accessor method for SVR. Will return None if all_regression_models() hasn't been called, yet. """ return self.SVR def get_nu_SVR(self): """ Accessor method for nu_SVR. Will return None if all_regression_models() hasn't been called, yet. """ return self.nu_SVR def get_linear_SVR(self): """ Accessor method for linear_SVR. Will return None if all_regression_models() hasn't been called, yet. """ return self.linear_SVR def get_XGB_regressor(self): """ Accessor method for XGB_regressor. Will return None if all_regression_models() hasn't been called, yet. """ return self.XGB_regressor # Modifier methods def set_attributes(self, new_attributes=None): """ Modifier method for attributes. Input should be a numpy array of independent variables. Defaults to None. """ self.attributes = new_attributes def set_labels(self, new_labels=None): """ Modifier method for labels. Input should be a numpy array of dependent variables. Defaults to None. """ self.labels = new_labels def set_test_size(self, new_test_size=0.25): """ Modifier method for test_size. Input should be a number or None. Defaults to 0.25. """ self.test_size = new_test_size def set_verbose(self, new_verbose=False): """ Modifier method for verbose. Input should be a truthy/falsy value. Defaults to False. """ self.verbose = new_verbose # Regression functionality def all_regression_models(self): """ Driver method for running all regression models with given attributes and labels. all_regression_models() first trains the models and determines their coefficients of determination and execution time via _all_regression_models_runner(). Then, all_regression_models() calls _print_results() to format and print each successful model's measurements, while also listing any failed models. If verbose is True, all verbose logging for each model will be enabled. If verbose is False, all logging to stdout and stderr will be suppressed. """ # Call helper method for running all regression models; suppress output, if needed if not self.verbose: suppress_output = io.StringIO() with redirect_stderr(suppress_output), redirect_stdout(suppress_output): self._all_regression_models_runner() else: self._all_regression_models_runner() # Print results self._print_results() # Helper methods def _all_regression_models_runner(self): """ Helper method that runs all models using the given dataset and all default parameters. After running all models, each model is determined to be either a success or failure, and relevant data (R2 score, execution time) is recorded. _all_regression_models_runner() may only be called by all_regression_models(). """ # Split dataset dataset_X_train, dataset_X_test, dataset_y_train, dataset_y_test =\ train_test_split(self.attributes, self.labels, test_size=self.test_size) # Run and time all models; identify each as success or failure try: start_time = time.time() self.linear_regression.fit(dataset_X_train, dataset_y_train) end_time = time.time() self._regression_models["LinearRegression"] =\ [self.linear_regression.score(dataset_X_test, dataset_y_test), end_time - start_time] except: self._failures.append("LinearRegression") try: start_time = time.time() self.random_forest.fit(dataset_X_train, dataset_y_train) end_time = time.time() self._regression_models["RandomForest"] =\ [self.random_forest.score(dataset_X_test, dataset_y_test), end_time - start_time] except: self._failures.append("RandomForest") try: start_time = time.time() self.SVR.fit(dataset_X_train, dataset_y_train) end_time = time.time() self._regression_models["SVR"] = [self.SVR.score(dataset_X_test, dataset_y_test), end_time - start_time] except: self._failures.append("SVR") try: start_time = time.time() self.nu_SVR.fit(dataset_X_train, dataset_y_train) end_time = time.time() self._regression_models["NuSVR"] = [self.nu_SVR.score(dataset_X_test, dataset_y_test), end_time - start_time] except: self._failures.append("NuSVR") try: start_time = time.time() self.linear_SVR.fit(dataset_X_train, dataset_y_train) end_time = time.time() self._regression_models["LinearSVR"] =\ [self.linear_SVR.score(dataset_X_test, dataset_y_test), end_time - start_time] except: self._failures.append("LinearSVR") try: start_time = time.time() self.XGB_regressor.fit(dataset_X_train, dataset_y_train) end_time = time.time() self._regression_models["XGBRegressor"] =\ [self.XGB_regressor.score(dataset_X_test, dataset_y_test), end_time - start_time] except: self._failures.append("XGBRegressor") def _print_results(self): """ Helper method that prints results of _all_regression_models_runner() in tabular form. _print_results() may only be called by all_regression_models() after all models have attempted to run. """ # Print models that didn't fail print("\nResults:\n") for model, data in self._regression_models.items(): print("{:<20} {:<20} {:<20}".format(model, data[0], data[1])) print() # Print failures, if any if len(self._failures) > 0: print("The following models failed to run:\n") for entry in self._failures: print(entry) print()
predictiveAttributeNotDegree[i][18]]) test_result_tot.append([predictiveAttributeDegree[i][2]]) train_percent = (len(predictiveAttributeNotDegree)/100)*80 count = 0 for i in range(len(predictiveAttributeNotDegree)): if count < train_percent: count = count + 1 train_set_tot.append([predictiveAttributeNotDegree[i][0], predictiveAttributeNotDegree[i][1], predictiveAttributeNotDegree[i][6], predictiveAttributeNotDegree[i][7], predictiveAttributeNotDegree[i][9], predictiveAttributeNotDegree[i][10], predictiveAttributeNotDegree[i][11], predictiveAttributeNotDegree[i][12],predictiveAttributeNotDegree[i][17], predictiveAttributeNotDegree[i][18]]) train_result_tot.append([predictiveAttributeNotDegree[i][2]]) else: test_set_tot.append([predictiveAttributeNotDegree[i][0], predictiveAttributeNotDegree[i][1], predictiveAttributeNotDegree[i][6], predictiveAttributeNotDegree[i][7], predictiveAttributeNotDegree[i][9], predictiveAttributeNotDegree[i][10], predictiveAttributeNotDegree[i][11], predictiveAttributeNotDegree[i][12],predictiveAttributeNotDegree[i][17], predictiveAttributeNotDegree[i][18]]) test_result_tot.append([predictiveAttributeNotDegree[i][2]]) train_result_tot = np.array(train_result_tot) svm_reg_tot.fit(train_set_tot, train_result_tot.ravel()) print("----ALL ATTRIBUTE: score: ", svm_reg_tot.score(test_set_tot, test_result_tot)) # 0. matr 1.cf 6.tipoCds 7.coorte 9.annodiploma 10.votodip 11.codschool 12.tipoMat 17.mot_sta 18.sta newStudent = [[2933, 2928, 1, 2015, 2015, 100, 200, 9, 3, 10]] real_value = [30] predicted = svm_reg_tot.predict(newStudent) print("----ALL ATTRIBUTE: Predicted: ", predicted) print("----ALL ATTRIBUTE: MSE: ", mean_squared_error(real_value, svm_reg_tot.predict(newStudent))) print("----ALL ATTRIBUTE: Params: ", svm_reg_tot.get_params())
svr_rbf = SVR(kernel='rbf') # 核函数 rbf 高斯 svr_poly = SVR(kernel='poly', degree=2, C=1e3) # 核函数 poly 多项式,degree=3 多项式次数为3 svr_line = SVR(kernel='linear', C=1e3) # 和函数 linear ,C惩罚系数 默认是1.0 svr_L = LinearSVR(C=1e3) svr_rbf.fit(X, Y) svr_poly.fit(X, Y) svr_line.fit(X, Y) svr_L.fit(X, Y) result_rbf = svr_rbf.predict(X) result_poly = svr_poly.predict(X) result_line = svr_line.predict(X) result_L = svr_L.predict(X) plt.plot(np.arange(len(result_rbf)), Y, 'b.') plt.plot(np.arange(len(result_rbf)), result_rbf, 'k-', label='rbf') plt.plot(np.arange(len(result_rbf)), result_poly, 'r-', label='poly') plt.plot(np.arange(len(result_rbf)), result_line, 'y-', label='linear') plt.plot(np.arange(len(result_rbf)), result_L, 'go-', label='LinearSVR') plt.legend() plt.show() print('rbf_score:', svr_rbf.score(X, Y)) print('poly_score:', svr_poly.score(X, Y)) print('linear_score:', svr_line.score(X, Y)) print('LinearSVR:', svr_L.score(X, Y)) # 总结 # 一般推荐在做训练之前对数据进行归一化,当然测试集中的数据也需要归一化。。 # 在特征数非常多的情况下,或者样本数远小于特征数的时候,使用线性核,效果已经很好,并且只需要选择惩罚系数C即可。 # 在选择核函数时,如果线性拟合不好,一般推荐使用默认的高斯核'rbf'。这时我们主要需要对惩罚系数C和核函数参数γγ进行艰苦的调参,通过多轮的交叉验证选择合适的惩罚系数C和核函数参数γγ。 # 理论上高斯核不会比线性核差,但是这个理论却建立在要花费更多的时间来调参上。所以实际上能用线性核解决问题我们尽量使用线性核。
verbose=0, random_state=None, max_iter=1000) # fit the model regr.fit(X_train, y_train) # get the prediction prediction_svm_p = regr.predict(X_test) # revert the prediction value prediction_svm_p_ori = prediction_svm_p * (y.max() - y.min()) + y.min() y_test_ori = np.array(y_test * (y.max() - y.min()) + y.min()) # get the score for this model score = regr.score(X_test, y_test) # calculate the mse value for the prediciton. mse_svm_p = np.mean((prediction_svm_p_ori - y_test_ori)**2) print("MSE with penalized SVM:", mse_svm_p) # plot the figure to see the difference between prediction and y_test. plt.plot(y_test_ori, label='y_test_ori') plt.plot(prediction_svm_p_ori, label='prediction_ori') plt.title('Comparison between y_test and prediction with SVM (L2 penalty)') plt.ylabel('CRIM') plt.legend() plt.show() ########## Apply in MLP # start to build MLP model = Sequential()
ss = StandardScaler() x_train = ss.fit_transform(x_train) x_test = ss.transform(x_test) y_train = ss.fit_transform(y_train) y_test = ss.transform(y_test) svr = SVR(kernel="linear") svr.fit(x_train, y_train) svr.score(x_test, y_test) y_predict = svr.predict(x_test) print( mean_squared_error(ss.inverse_transform(y_test), ss.inverse_transform(y_predict))) print( mean_absolute_error(ss.inverse_transform(y_test), ss.inverse_transform(y_predict))) lsvr = LinearSVR() lsvr.fit(x_train, y_train) lsvr.score(x_test, y_test) y_predict1 = lsvr.predict(x_test) print( mean_squared_error(ss.inverse_transform(y_test), ss.inverse_transform(y_predict1))) print( mean_absolute_error(ss.inverse_transform(y_test), ss.inverse_transform(y_predict1)))
X2 = X_train_reduced[test] Y2 = Y_train_raw[test] ## Train Classifiers on fold rdg_clf = Ridge(alpha=0.5) rdg_clf.fit(X1, Y1) lso_clf = Lasso(alpha=0.6257) lso_clf.fit(X1, Y1) svr_clf = LinearSVR(C=1e3) svr_clf.fit(X1, Y1) ## Score Classifiers on fold rdg_clf_score = rdg_clf.score(X2, Y2) lso_clf_score = lso_clf.score(X2, Y2) svr_clf_score = svr_clf.score(X2, Y2) print "Ridge: ", rdg_clf_score print "Lasso: ", lso_clf_score print "SVR_RBF: ", svr_clf_score ## Train final Classifiers # clf = Ridge(alpha=.5) clf = LinearSVR(C=1e3, gamma=0.1) clf.fit(X_train_reduced, Y_train_raw) Y_predicted = clf.predict(X_test_reduced) ## Save results to csv np.savetxt("prediction.csv", Y_predicted, fmt="%.5f", delimiter=",")
from sklearn.neighbors import KNeighborsRegressor knreg = KNeighborsRegressor(n_neighbors=5) knreg.fit(X_train, y_train) score_list.append(knreg.score(X_test, y_test)) ## Support Vector Regressor from sklearn.svm import SVR svm_reg = SVR(kernel='poly', gamma='auto', degree=2, C=5, epsilon=0.1) svm_reg.fit(X_train, y_train) score_list.append(svm_reg.score(X_test, y_test)) ## linearSVR from sklearn.svm import LinearSVR sv_reg = LinearSVR(max_iter=1000) sv_reg.fit(X_train, y_train) score_list.append(sv_reg.score(X_test, y_test)) ## random forest from sklearn.ensemble import RandomForestRegressor rf_reg = RandomForestRegressor(max_depth=5) rf_reg.fit(X_train, y_train) score_list.append(rf_reg.score(X_test, y_test)) ''' ## LightGBM import lightgbm as lgb lgb_reg=lgb.LGBMRegressor(objective='regression') lgb_reg.fit(X_train, y_train) score_list.append(lgb_reg.score(X_test, y_test)) ''' ''' ### XGBoost
df = df.iloc[:2949, :] import pickle df.to_pickle("Final_Data") df.read_pickle("Final_Data") for idx, row in output_df.iterrows(): df.loc[row['FIPS'], 'annual_count_avg'] = row['Average Annual Count'] X = df.loc[:, :'WATR'] y = df['annual_count_avg'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) from sklearn.svm import LinearSVR svr = LinearSVR(random_state=0, tol=1e-5).fit(X_train, y_train) svr.score(X_test, y_test) from sklearn import svm svm = svm.SVR().fit(X_train, y_train) svm.score(X_test, y_test) from sklearn.svm import NuSVR nuSVR = NuSVR().fit(X_train, y_train) nuSVR.score(X_test, y_test) from sklearn import linear_model ridge = linear_model.Ridge(alpha=0.5).fit(X_train, y_train) ridge.score(X_test, y_test) np.argmax(ridge.coef_)
if __name__ == '__main__': np.random.seed(1) m = 100 # 데이터 샘플 개수 X = 2 * np.random.rand(m, 1) # 0 <= X < 2의 (100, 1) shape의 난수 y = (4 + 3 * X + np.random.randn(m, 1)).ravel() # (100,) 1차원 배열 # plot_data(X, y) # plt.show() reg1 = LinearSVR(random_state=1) reg1.fit(X, y) print(reg1.intercept_, reg1.coef_) y_pred = reg1.predict(X) reg1_mse = mean_squared_error(y_true=y, y_pred=y_pred) reg1_rmse = np.sqrt(reg1_mse) reg1_r2 = reg1.score(X, y) # R2 score print(reg1_rmse, reg1_r2) # 데이터, 회귀 직선 그래프 axes = [0, 2] plot_data(X, y) plot_svm_regression(reg1, axes, label='LinearSVR(e=0)') for e in [0.5, 1.0, 1.5]: reg2 = LinearSVR(random_state=1, epsilon=e) reg2.fit(X, y) plot_svm_regression(reg2, axes, label=f'LinearSVR(e={e})') reg3 = SVR(kernel='linear') reg3.fit(X, y) plot_svm_regression(reg3, axes, label='SVR(e=0.1)')
train_set.append([ predictiveAttributeNotDegree[i][11], predictiveAttributeNotDegree[i][13] ]) train_result.append([predictiveAttributeNotDegree[i][2]]) else: test_set.append([ predictiveAttributeNotDegree[i][11], predictiveAttributeNotDegree[i][13] ]) test_result.append([predictiveAttributeNotDegree[i][2]]) svm_reg = LinearSVR(epsilon=1.0, max_iter=10000000) train_result = np.array(train_result) svm_reg.fit(train_set, train_result.ravel()) print(svm_reg.score(test_set, test_result)) prediction = [] for item in test_set: items = [[item[0], item[1]]] prediction.append(svm_reg.predict(items)) pred = np.zeros(len(prediction)) predi = np.array(prediction) for i in range(len(prediction)): pred[i] = predi[i][0] print(("MSE: {}".format(mean_squared_error(pred, test_result)))) print("Params: ", svm_reg.get_params()) test_set = np.array(test_set) test_result = np.array(test_result) from mlxtend.plotting import plot_decision_regions import matplotlib.pyplot as plt
class AutoSklearnRegression(): def __init__(self, appID, models=None, evaluation_parameters=None, dimensionality_reduction=None): if (models == None): models = [ 'LinearRegression', 'LinearSVR', 'GradientBoostingRegressor' ] if (evaluation_parameters == None): evaluation_parameters = [ 'ExplainedVariance', 'MAE', 'MSE', 'R2Score' ] if (dimensionality_reduction == None): dimensionality_reduction = ['ExtraTreesClassifier'] self.models = models self.dimensionality_reduction = dimensionality_reduction self.evaluation_parameters = evaluation_parameters self.models_to_use = [True, True, True] self.params_to_use = [True, True, True, True] self.set_training_parameters() from sklearn.linear_model import LinearRegression self.lr_estimator = LinearRegression() from sklearn.svm import LinearSVR self.svr_estimator = LinearSVR() from sklearn.ensemble import GradientBoostingRegressor self.gbr_estimator = GradientBoostingRegressor() self.appID = str(appID) + '.html' def set_training_parameters(self): if 'LinearRegression' not in self.models: self.models_to_use[0] = False if 'LinearSVR' not in self.models: self.models_to_use[1] = False if 'GradientBoostingRegressor' not in self.models: self.models_to_use[2] = False if 'ExplainedVariance' not in self.evaluation_parameters: self.params_to_use[0] = False if 'MAE' not in self.evaluation_parameters: self.params_to_use[1] = False if 'MSE' not in self.evaluation_parameters: self.params_to_use[2] = False if 'R2Score' not in self.evaluation_parameters: self.params_to_use[3] = False def train(self, data, test_size, response_col_name): self.test_size = test_size self.data = data self.original_data = data self.response = response_col_name self.data = self.data.dropna(axis=1, how='all') # Imputing nan import numpy as np self.data = self.data.replace([np.inf, -np.inf], np.nan) self.missing_value_columns = self.data.columns[ self.data.isnull().any()] for col in self.missing_value_columns: if (len(self.data[col].value_counts()) < 5): self.data[col].fillna(self.data[col].mode()[0], inplace=True) else: self.data[col].fillna(self.data[col].mean(), inplace=True) # Encoding data from automl.sklearn.preprocessing.DummyEncode import DummyEncode self.data = DummyEncode(self.data).encode() # Sepearting target and response data self.Y = self.data[self.response] self.X = self.data.drop(self.response, 1) # Dimensionality Reduction self.X = self.select_dimensions() # Spliting into train and test data set from automl.sklearn.preprocessing.Split import Split self.x_train, self.x_test, self.y_train, self.y_test = Split( ).train_test_split(self.X, self.Y, test_size=test_size, random_state=0) # Training all selected models with train data self.headers = ['Evaluation Parameters'] generated_models = [] if self.models_to_use[0]: self.lr_model = self.lr_estimator.fit(self.x_train, self.y_train) self.headers.append('LinearRegression') generated_models.append(self.lr_model) if self.models_to_use[1]: self.svr_model = self.svr_estimator.fit(self.x_train, self.y_train) self.headers.append('LinearSVR') generated_models.append(self.svr_model) if self.models_to_use[2]: self.gbr_model = self.gbr_estimator.fit(self.x_train, self.y_train) self.headers.append('GradientBoostingRegressor') generated_models.append(self.gbr_model) # Predicting on test data with all selected models self.predict_all() # Print data print_data_str = self.print_data() # Generating summary of prediction print_summary_str = self.summary() # Selecting best model on the basis of R2_score self.best_model() # Returning all selected trained models # return generated_models # printing output in a html file and storing it in string variable with open(self.appID, 'w') as f: print(print_data_str, print_summary_str, self.best_model_str, file=f) with open(self.appID, 'r') as myfile: str_output = myfile.read() return str_output def select_dimensions(self): if self.dimensionality_reduction[0] == 'LinearSVC': from automl.sklearn.preprocessing.LinearSVC import LinearSVC self.reducer = LinearSVC(self.X, self.Y) self.method_used = 'LinearSVC' if self.dimensionality_reduction[0] == 'ExtraTreesClassifier': from automl.sklearn.preprocessing.ExtraTreesClassifier import ExtraTreesClassifier self.reducer = ExtraTreesClassifier(self.X, self.Y) self.method_used = 'ExtraTreesClassifier' if self.dimensionality_reduction[0] == 'LogisticRegression': from automl.sklearn.preprocessing.LogisticRegression import LogisticRegression self.reducer = LogisticRegression(self.X, self.Y) self.method_used = 'LogisticRegression' if self.dimensionality_reduction[0] == 'LassoRegression': from automl.sklearn.preprocessing.LassoRegression import LassoRegression self.reducer = LassoRegression(self.X, self.Y) self.method_used = 'LassoRegression' return self.reducer.selectFeatures() def predict_all(self): self.y_pred_all = [] if self.models_to_use[0]: self.y_predict__lr = self.lr_estimator.predict(self.x_test) self.y_pred_all.append(self.y_predict__lr) if self.models_to_use[1]: self.y_predict__svr = self.svr_estimator.predict(self.x_test) self.y_pred_all.append(self.y_predict__svr) if self.models_to_use[2]: self.y_predict__gbr = self.gbr_estimator.predict(self.x_test) self.y_pred_all.append(self.y_predict__gbr) # return self.y_predict__lr, self.y_predict__svr, self.y_predict__gbr def predict(self, data): # Imputing nan missing_value_columns = data.columns[data.isnull().any()] for col in missing_value_columns: if (len(data[col].value_counts()) < 10): data[col].fillna(data[col].mode()[0], inplace=True) else: data[col].fillna(data[col].mean(), inplace=True) # Encoding data from automl.sklearn.preprocessing.DummyEncode import DummyEncode data = DummyEncode(data).encode() if self.response in data.columns: data_y = data[self.response] data_x = data.drop(self.response, 1) else: data_y = None data_x = data if self.best_fit_model == self.lr_estimator: self.predcition = self.lr_estimator.predict(data_x) elif self.best_fit_model == self.svr_estimator: self.prediction = self.svr_estimator.predict(data_x) else: self.prediction = self.gbr_estimator.predict(data_x) return self.predcition def score(self, x_train, y_train): return self.lr_estimator.score(x_train, y_train), self.svr_estimator.score( x_train, y_train), self.gbr_estimator.score( x_train, y_train) def summary(self): from tabulate import tabulate from sklearn.metrics import explained_variance_score from sklearn.metrics import mean_absolute_error from sklearn.metrics import mean_squared_error from sklearn.metrics import r2_score evaluation_table = [] if self.params_to_use[0]: evaluation_table.append(['Explained Variance Score']) if self.params_to_use[1]: evaluation_table.append(['Mean Absolute Error']) if self.params_to_use[2]: evaluation_table.append(['Mean Squared Error']) if self.params_to_use[3]: evaluation_table.append(['R2 Score']) for y_pred in self.y_pred_all: i = 0 if self.params_to_use[0]: evaluation_table[i].append( explained_variance_score(self.y_test, y_pred)) i = i + 1 if self.params_to_use[1]: evaluation_table[i].append( mean_absolute_error(self.y_test, y_pred)) i = i + 1 if self.params_to_use[2]: evaluation_table[i].append( mean_squared_error(self.y_test, y_pred)) i = i + 1 if self.params_to_use[3]: evaluation_table[i].append(r2_score(self.y_test, y_pred)) summary_str = '<p><b>Accuracy Metric:</b></p><div style="overflow-x:auto;">'\ + tabulate(evaluation_table, headers=self.headers ,tablefmt="html")+ '</div>' return summary_str def print_data(self): tb_data = self.original_data.head(n=5) tb_train = self.x_train.head(n=5) tb_test = self.x_test.head(n=5) tb_columns1 = list(self.data.columns) tb_columns2 = list(self.x_train.columns) style_html = '<style>table {border-collapse: collapse;width: 80%;}th, td {padding: 8px;text-align: left;border: 1px solid #ddd; font-size: 12px;}tr:hover {background-color:#f5f5f5;}th {background-color: #ec1a3d;color: white;}</style>' from tabulate import tabulate info_tables = '<div><p><b>Data Dimensions: </b>' + str(self.data.shape[0]) \ + ' Rows and ' + str(self.data.shape[1]) + ' Features</p><p><b>Prediction Variable: </b>' \ + self.response + '</p>' + '<p><b>Available Features: </b>' + str(tb_columns1) + '</p>' \ + '<p><b>Columns where missing values were found and replaced: </b>' \ + str(self.missing_value_columns) + '</p>' +'<p><b>Method Used for Dimensionality Reduction:</b> '+ self.method_used +'</p><p><b>Selected Best Features: </b>' \ + str(tb_columns2) + '</p>' + '<p><b>Complete Dataset: </b>' + str(self.data.shape[0]) \ + ' Rows and ' + str(self.data.shape[1]) + ' Features</p>' + '<p><b>Target Feature: </b>' \ + self.response + '</p>' + '<div style="overflow-x:auto;">' \ + tabulate(tb_data, headers=tb_columns1, tablefmt="html") + '</div>' \ + '<p><b>Splitting:</b> [############] 100%</p>' + '<p><b>Training Dataset: </b>' \ + str(self.x_train.shape[0]) + ' Rows and ' + str(self.x_train.shape[1]) + ' Features</p>' \ + '<div style="overflow-x:auto;">' + tabulate(tb_train, headers=tb_columns2, tablefmt="html") \ + '</div>' + '<p><b>Testing Dataset: </b>' + str(self.x_test.shape[0]) + ' Rows and ' \ + str(self.x_test.shape[1]) + ' Features</p>' + '<div style="overflow-x:auto;">' \ + tabulate(tb_test, headers=tb_columns2, tablefmt="html") + '</div>' \ + '<p><b>Training Models:</b> [#############################] 100%</p>' return style_html + info_tables def best_model(self): acc_lr, acc_svr, acc_gbr = -10000, -10000, -10000 from sklearn.metrics import r2_score if self.models_to_use[0]: acc_lr = r2_score(self.y_test, self.y_predict__lr) if self.models_to_use[1]: acc_svr = r2_score(self.y_test, self.y_predict__svr) if self.models_to_use[2]: acc_gbr = r2_score(self.y_test, self.y_predict__gbr) best_acc = max(acc_lr, acc_svr, acc_gbr) if best_acc == acc_lr: model_name = 'Linear Regression' self.best_fit_model = self.lr_estimator elif best_acc == acc_svr: model_name = 'Support Vector Regression' self.best_fit_model = self.svr_estimator else: model_name = 'Gradient Boosting Regression' self.best_fit_model = self.gbr_estimator self.best_model_str = '<p><b>Evaluating Best Model:</b> [##########] Done</p><p><b>Best Model:</b> ' \ + model_name + '</p><p><b>R2_Score:</b> ' + str(best_acc) + '</p></div>' return self.best_fit_model
def load_data(): # load data boston = datasets.load_boston() X = boston.data y = boston.target # shuffle shuffle_indexes = np.random.permutation(len(X)) X, y = X[shuffle_indexes], y[shuffle_indexes] return X, y if __name__ == '__main__': X, y = load_data() X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=random_state) standard_scaler = StandardScaler() standard_scaler.fit(X_train) X_train_standard = standard_scaler.transform(X_train) X_test_standard = standard_scaler.transform(X_test) linear_svr = LinearSVR(C=1.0) linear_svr.fit(X_train_standard, y_train) score = linear_svr.score(X_test_standard, y_test) print(score) # 0.8164411717195368 pass
from sklearn.svm import LinearSVR, LinearSVC from sklearn.cross_validation import train_test_split from sklearn.linear_model import LinearRegression from sklearn.linear_model import LogisticRegression #beause of the previous learning,in these codes,I will not use the normalizatin and result analysing #try to compare the SVR with the linearRegression on a same dataset data = pd.read_csv("./Folds5x2_pp.csv", header=0, encoding="gbk") X = data[['AT', 'V', 'AP', 'RH']] y = data[['PE']] X_train, X_test, y_train, y_test = train_test_split( X, y, random_state=10) #拆分成训练集和测试集 svr_Linear = LinearSVR(random_state=0) svr_Linear.fit(X_train, y_train) print("SVR_score:", svr_Linear.score(X_train, y_train)) liner = LinearRegression() liner.fit(X_train, y_train) print("Linearmodel_score:", liner.score(X_train, y_train)) #by doing so,in this example,you will see that linerRegresion fit better #try to compare the svc with logisticregression on a same dataset URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data' wine_dataset = pd.read_csv(URL, header=None) wine_dataset.columns = [ 'class label', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', 'F13' ] X, y = wine_dataset.iloc[:, 1:].values, wine_dataset.iloc[:, 0].values X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)
random_state=None, tol=0.000001, verbose=0).fit(surgeXC, nU) coefLinear = linear_svm.coef_ linear_svm_ramp = LinearSVR(C=1, fit_intercept=True, dual=True, epsilon=1e-6, loss='squared_epsilon_insensitive', max_iter=10000, random_state=None, verbose=0).fit(surgeXC, nU_ramp) coefLinear_ramp = linear_svm_ramp.coef_ print("Train set accuracy of Surge on LinearSVR method: {:.2f}".format( linear_svm.score(surgeXC, nU))) linear_svm1 = LinearSVR(C=1e08, fit_intercept=True, dual=True, epsilon=1e-6, loss='squared_epsilon_insensitive', max_iter=10000, random_state=None, tol=0.000001, verbose=0).fit(swayYC, nV) coefLinear1 = linear_svm1.coef_ linear_svm1_ramp = LinearSVR(C=1e08, fit_intercept=True, dual=True,
X_test = test[features].dropna() y_test = test[target].dropna() svr = LinearSVR(random_state=0) # svr = SVR(kernel='linear', C=1e3) # svr = SVR(kernel='poly', C=1e3, degree=2) # train the model on the training set svr.fit(X_train, y_train) y_pred = svr.predict(X_test) plt.scatter(y_test, y_pred, color='blue') plt.xlabel("Real revenue") plt.ylabel("Predicted revenue") plt.show() svr_score_train = svr.score(X_test, y_test) svr_score_test = svr.score(X_train, y_train) print("Training score: ", svr_score_train) print("Testing score: ", svr_score_test) # y = movies.revenue.values # length = 4083 # y = y.reshape(-1, 1) # x = preprocessing.scale(x) # y = preprocessing.scale(y) # regr = scr_rbfear_model.scr_rbfearRegression() # regr.fit(x,y)
from sklearn.svm import LinearSVR # initialize model SVR_model = LinearSVR() # fit model SVR_model.fit(X_train, y_train) #predictions: test data y_pred = SVR_model.predict(X_test) print('\n\n\nSVM report') #Scores print('Train score') print(SVR_model.score(X_train, y_train)) print('Test score') print(SVR_model.score(X_test, y_test)) print('-------------------------------------------------------') # MAE print('Mean absolute error') print(mean_absolute_error(y_test, y_pred)) print('-------------------------------------------------------') # MSE print('Mean squared error') print(mean_squared_error(y_test, y_pred)) print('-------------------------------------------------------') # R-squared
X_test = [] Y_test = [] for i in range(0, len(test_data)): for j in range(0, len(test_data[i])): for k in range(0, len(test_data[i][j]) - 1): X_test.append(test_data[i][j][k]) Y_test.append(labels[test_data[i][j][-1]]) X_test = np.array(X_test) Y_test = np.array(Y_test) print(time.asctime(time.localtime(time.time()))) model = LinearSVR(loss='squared_epsilon_insensitive', verbose=1, max_iter=1000) #model = linear_model.Ridge(max_iter=5000,fit_intercept=True) model.fit(X_train, Y_train) print(time.asctime(time.localtime(time.time()))) print(model.score(X_train, Y_train)) save_classifier = open("linear_model", "wb") pickle.dump(model, save_classifier) save_classifier.close() f1 = open('truthMean.txt', 'w') f2 = open('linear_predict.txt', 'w') for i in Y_test: f1.write(str(i)) f1.write('\n') for i in list(model.predict(X_test)): tmp = [abs(1 - i), abs(0.6666667 - i), abs(0.33333334 - i), abs(0 - i)] x = tmp.index(min(tmp)) if x == 0: x = 1
print 'LinearSVC config:' print lsvc.get_params() lsvc.fit(smr_train.feature_matrix, smr_train.labels) lsvc_score_train = lsvc.score(smr_train.feature_matrix, smr_train.labels) print 'LinearSVC precision train: {}'.format(lsvc_score_train) lsvc_score_test = lsvc.score(smr_test.feature_matrix, smr_test.labels) print 'LinearSVC precision test: {}'.format(lsvc_score_test) print '' lsvr = LinearSVR() print 'LinearSVR config:' print svc.get_params() lsvr.fit(smr_train.feature_matrix, smr_train.labels) lsvr_score_train = svc.score(smr_train.feature_matrix, smr_train.labels) print 'LinearSVR precision train: {}'.format(lsvr_score_train) lsvr_score_test = lsvr.score(smr_test.feature_matrix, smr_test.labels) print 'LinearSVR precision test: {}'.format(lsvr_score_test) print '' nusvc = NuSVC() print 'NuSVC config:' print nusvc.get_params() nusvc.fit(smr_train.feature_matrix, smr_train.labels) nusvc_score_train = nusvc.score(smr_train.feature_matrix, smr_train.labels) print 'NuSVC precision train: {}'.format(nusvc_score_train) nusvc_score_test = nusvc.score(smr_test.feature_matrix, smr_test.labels) print 'NuSVC precision test: {}'.format(nusvc_score_test) print '' nusvr = NuSVR() print 'NuSVR config:'