def NuSVRRegressor(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = NuSVR() reg1.fit(X_train, y_train1) reg2 = NuSVR() reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="NuSVRRegressor", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def NuSVRRegressorGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = NuSVR() reg2 = NuSVR() grid_values = { 'nu': [value * 0.1 for value in range(1, 3)], 'C': list(range(1, 3)), 'kernel': ['poly', 'rbf'], 'degree': list(range(1, 3)) } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="NuSVRRegressorGS", best_params=best_params) logSave(nameOfModel="NuSVRRegressorGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
class MIKernelSVR(MIKernelSVM): def __init__(self, **parameters): svr_params = { 'kernel' : 'precomputed', 'max_iter': MAX_ITERS, } if 'C' in parameters: svr_params['C'] = parameters.pop('C') if 'nu' in parameters: svr_params['nu'] = parameters.pop('nu') self.estimator = NuSVR(**svr_params) # Get kernel name and pass remaining parameters to kernel mi_kernel_name = parameters.pop('kernel') self.mi_kernel = kernel.by_name(mi_kernel_name, **parameters) def fit(self, X, y): X = map(np.asarray, X) self.fit_data = X self.gram_matrix = self.mi_kernel(X, X) self.estimator.fit(self.gram_matrix, y) return self def predict(self, X=None): if X is None: gram_matrix = self.gram_matrix else: X = map(np.asarray, X) gram_matrix = self.mi_kernel(X, self.fit_data) return self.estimator.predict(gram_matrix)
def predict(self, X): if hasattr(self, '_onedal_estimator'): logging.info("sklearn.svm.NuSVR.predict: " + get_patch_message("onedal")) return self._onedal_estimator.predict(X) else: logging.info("sklearn.svm.NuSVR.predict: " + get_patch_message("sklearn")) return sklearn_NuSVR.predict(self, X)
class _NuSVRImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def applySVR(X_train, X_test, y_train, n_components, gamma): print('n_components=', n_components, 'gamma=', gamma) """To apply PCA to reduce time. I experimented with quite a values of this. Around 150 is the number of features/components that seem to work good for this problem. Anyways, a better idea would be check it up again manually by experimenting.""" # pca = PCA(n_components=n_components).fit(X_train) # X_train = pca.transform(X_train) # X_test = pca.transform(X_test) # clf = NuSVR(C=100.0, cache_size=200, coef0=0.0, degree=3, gamma=gamma, # kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001, # verbose=False) clf = NuSVR(C=100, cache_size=200, coef0=0.0, degree=3, gamma=gamma, kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=False) clf.fit(X_train, y_train) np.set_printoptions(threshold=np.inf) #print(len(clf.support_), clf.support_) print('number of test data', len(X_test)) y_rbf = clf.predict(X_test) print('\n\npredictions\n\n') # print(y_rbf) for i in range(len(y_rbf)): # print(X_test[i]) print(test_files[i] + ", " + str(y_rbf[i])) # print('predictions made are as follows.') # for i in range(len(y_rbf)): # print(y_rbf[i], y_test[i]) #for y in y_rbf: # print(y, end=' ') # """These are the set of methods which are useful metrics. The paper used rmse value as one of the metrics.
def _test_evaluation(self, allow_slow): """ Test that the same predictions are made """ # Generate some smallish (some kernels take too long on anything else) random data x, y = [], [] for _ in range(50): cur_x1, cur_x2 = random.gauss(2,3), random.gauss(-1,2) x.append([cur_x1, cur_x2]) y.append( 1 + 2*cur_x1 + 3*cur_x2 ) input_names = ['x1', 'x2'] df = pd.DataFrame(x, columns=input_names) # Parameters to test kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2}, {'kernel': 'linear'}, {'kernel': 'poly'}, {'kernel': 'poly', 'degree': 2}, {'kernel': 'poly', 'gamma': 0.75}, {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2}, {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8}, {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5} ] non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'shrinking': True}, {'C': 0.5, 'shrinking': False, 'nu': 0.9}] # Test for param1 in non_kernel_parameters: for param2 in kernel_parameters: cur_params = param1.copy() cur_params.update(param2) cur_model = NuSVR(**cur_params) cur_model.fit(x, y) df['prediction'] = cur_model.predict(x) spec = scikit_converter.convert(cur_model, input_names, 'target') if is_macos() and macos_version() >= (10, 13): metrics = evaluate_regressor(spec, df) self.assertAlmostEquals(metrics['max_error'], 0) if not allow_slow: break if not allow_slow: break
def stacking(base_models, X, Y, T): models = base_models folds = list(KFold(len(Y), n_folds=10, random_state=0)) S_train = np.zeros((X.shape[0], len(models))) S_test = np.zeros((T.shape[0], len(models))) for i, bm in enumerate(models): clf = bm[1] S_test_i = np.zeros((T.shape[0], len(folds))) for j, (train_idx, test_idx) in enumerate(folds): X_train = X[train_idx] y_train = Y[train_idx] X_holdout = X[test_idx] clf.fit(X_train, y_train) y_pred = clf.predict(X_holdout)[:] S_train[test_idx, i] = y_pred S_test_i[:, j] = clf.predict(T)[:] S_test[:, i] = S_test_i.mean(1) nuss = NuSVR(kernel='rbf') nuss.fit(S_train, Y) yp = nuss.predict(S_test)[:] return yp
class TestNuSVRIntegration(TestCase): def setUp(self): df = pd.read_csv(path.join(BASE_DIR, '../models/categorical-test.csv')) Xte = df.iloc[:, 1:] Xenc = pd.get_dummies(Xte, prefix_sep='') yte = df.iloc[:, 0] self.test = (Xte, yte) self.enc = (Xenc, yte) pmml = path.join(BASE_DIR, '../models/svr-cat-pima.pmml') self.clf = PMMLNuSVR(pmml) self.ref = NuSVR() self.ref.fit(Xenc, yte == 'Yes') def test_fit_exception(self): with self.assertRaises(Exception) as cm: self.clf.fit(np.array([[]]), np.array([])) assert str(cm.exception) == 'Not supported.' def test_more_tags(self): assert self.clf._more_tags() == NuSVR()._more_tags() def test_sklearn2pmml(self): # Export to PMML pipeline = PMMLPipeline([("regressor", self.ref)]) pipeline.fit(self.enc[0], self.enc[1] == 'Yes') sklearn2pmml(pipeline, "svr-sklearn2pmml.pmml", with_repr=True) try: # Import PMML model = PMMLNuSVR(pmml='svr-sklearn2pmml.pmml') # Verify classification Xenc, _ = self.enc assert np.allclose(self.ref.predict(Xenc), model.predict(Xenc)) finally: remove("svr-sklearn2pmml.pmml")
#!/usr/bin/env python # coding=utf-8 import os from sklearn.svm import NuSVR from settings import DATA_DIR from utils import load_data, split_dataset FILENAME = os.path.join(DATA_DIR, 'boston_house_prices.csv') if __name__ == '__main__': dataset = load_data(FILENAME) train_set, test_set = split_dataset(dataset) X = [train_data[:-1] for train_data in train_set] y = [train_data[-1] for train_data in train_set] X_test = [test_data[:-1] for test_data in test_set] X_classies = [test_data[-1] for test_data in test_set] clf = NuSVR() clf.fit(X, y) predicts = clf.predict(X_test) bias = 0.0 for (i, predict) in enumerate(predicts): bias += abs(predict - X_classies[i]) print bias / len(X_classies)
def r(n, dp=4): return round(n, dp) data = pd.read_csv("../data/cardio_1dp.csv") test = pd.read_csv("../data/new_cardio.csv") x_train, y_train = data.iloc[:, :-1], data.iloc[:, -1] x_test, y_test = test.iloc[:, :-1], test.iloc[:, -1] results = [] # trainign starts here for NU in [0.3, 0.5, 0.7, 0.9]: model = NuSVR(nu=NU, gamma="scale") model.fit(x_train, y_train) y_pred = model.predict(x_test) y_pred = [0 if i < 0.5 else 1 for i in y_pred] scores = { "accuracy": r(accuracy_score(y_test, y_pred)), "precision": r(precision_score(y_test, y_pred)), "recall": r(recall_score(y_test, y_pred)), "f1 score": r(f1_score(y_test, y_pred)) } results.append((NU, scores)) for n, s in results: print(n, " " * 27, s)
print(y_train.shape) print(x_test.shape) print(y_test.shape) # Reshape data to meet the requirements of the modle y_train = np.reshape(y_train, [-1]) y_test = np.reshape(y_test, [-1]) # Training the Nu SVR model print('Building and training the Nu SVR model...') clf = NuSVR(kernel='poly', gamma=0.0523125) clf.fit(x_train, y_train) # Gathering predictions from the model ytrain = clf.predict(x_train) ytest = clf.predict(x_test) # Print performance metrics print('---------Training-------') print('Explained Variance Score', explained_variance_score(y_train, ytrain), 'Out of 1.00') print('Mean Absolute Error', mean_absolute_error(y_train, ytrain)) print('Mean Squared Error', mean_squared_error(y_train, y_train)) print('Median Absolute Error', median_absolute_error(y_train, ytrain)) print('R2 Score', r2_score(y_train, ytrain), 'Out of 1.00') print('Average Percent Error', (mean_absolute_error(y_train, ytrain)/np.average(y_train))) print('---------Testing-------') print('Explained Variance Score', explained_variance_score(y_test, ytest), 'Out of 1.00') print('Mean Absolute Error', mean_absolute_error(y_test, ytest)) print('Mean Squared Error', mean_squared_error(y_test, y_test))
# print "Max shuf: ", np.max(ramp) # plt.plot(train_mask) # plt.show() # print np.shape(train_mask) # print np.shape(inp) #inp2 = np.vstack([inp, t]) X = np.array(X) Y = np.array(Y) print "X: ", np.shape(X) print "Y: ", np.shape(Y) print "isnans: ", np.sum(np.isnan(Y)) print "Fitting to S..." S.fit(X[train_mask,:],Y[train_mask]) print "Saving S " with open('S.pkl','wb') as file: pickle.dump(S, file, pickle.HIGHEST_PROTOCOL) # plt.figure() plt.plot(Y[test_mask],color='red',marker='.') plt.plot(S.predict(X[test_mask,:])) plt.show()
def runTcheby(): global param, approx_pareto_front, archiveOK, NO_FILE_TO_WRITE ############################################################################ # PARAMETER #clf = SVR(C=1.0, epsilon=0.1, kernel="rbf") clf = NuSVR(cache_size=2000, shrinking=True,verbose=True) clf2 = -1 two_models_bool = False isReals = True start_fct, nb_functions = param[0:2] nb_iterations, neighboring_size = param[2:4] init_decisions, problem_size = param[4:6] max_decisions_maj, delta_neighbourhood = param[6:8] CR, search_space = param[8:10] F, distrib_index_n = param[10:12] pm, operator_fct = param[12:14] nb_samples, training_neighborhood_size = param[14:16] strategy, file_to_write = param[16:18] filter_strat, free_eval = param[18:20] param_print_every, file_to_writeR2 = param[20:22] filenameDIR, filenameSCORE = param[22:24] nb_objectives = len(start_fct) #get separatly offspring operator fct crossover_fct, mutation_fct, repair_fct = operator_fct best_decisions = copy.deepcopy(init_decisions) sampling_param = [crossover_fct, mutation_fct, repair_fct, best_decisions, F, problem_size, CR, search_space, distrib_index_n, pm] ############################################################################ # INITIALISATION qual_tools.resetGlobalVariables(filenameDIR, filenameSCORE, nb_iterations, nb_functions) eval_to.resetEval() #get the directions weight for both starting functions directions = dec.getDirections(nb_functions, nb_objectives) #init the neighboring constant nt.initNeighboringTab(nb_functions, neighboring_size, directions, nb_objectives) #giving global visibility to the best_decisions to get the result at the end approx_pareto_front = best_decisions #initial best decisions scores best_decisions_scores = [eval_to.free_eval(start_fct, best_decisions[i], problem_size) for i in range(nb_functions)] pop_size = nb_functions #current optimal scores for both axes z_opt_scores = gt.getMinTabOf(best_decisions_scores) eval_to.initZstar(z_opt_scores) #get the first training part of the item we will learn on model_directions = train_to.getDirectionsTrainingMatrix(directions) #if the data shall be write in a file writeOK = False if(file_to_write != NO_FILE_TO_WRITE): writeOK = True writeR2OK = False if(file_to_writeR2 != NO_FILE_TO_WRITE): writeR2OK = True ############################################################################ # MAIN ALGORITHM if(writeOK): iot.printObjectives(file_to_write, eval_to.getNbEvals(), 0,best_decisions_scores, problem_size, nb_objectives) #set of all the solution evaluated all_decisions = copy.deepcopy(best_decisions) all_decisions_scores = copy.deepcopy(best_decisions_scores) all_len = nb_functions #IDs tab to allow a random course through the directions in the main loop id_directions = [i for i in range(nb_functions)] #iterations loop for itera in range(nb_iterations): #Update model training_inputs, training_outputs, training_set_size, training_scores = train_to.getTrainingSet(model_directions, all_decisions, all_decisions_scores ,eval_to.getZstar_with_decal(), strategy, nb_functions, training_neighborhood_size) print(len(training_outputs)) clf.fit(training_inputs, training_outputs) if(writeR2OK): training_inputs_tcheby = eval_to.getManyTcheby(training_inputs, training_scores, eval_to.getZstar_with_decal(), training_set_size) random_index = numpy.arange(0,training_set_size) numpy.random.shuffle(random_index) n_folds = 10 folds_sizes = (training_set_size // n_folds) * numpy.ones(n_folds, dtype=numpy.int) folds_sizes[:training_set_size % n_folds] += 1 training_inputs_array = numpy.array(training_inputs) training_tcheby_array = numpy.array(training_inputs_tcheby) R2_cv = [] MSE_cv = [] MAE_cv = [] MDAE_cv = [] clfCV = NuSVR() current = 0 for fold_size in folds_sizes: start, stop = current, current + fold_size mask = numpy.ones(training_set_size, dtype=bool) mask[start:stop] = 0 current = stop clfCV.fit(training_inputs_array[random_index[mask]], training_tcheby_array[random_index[mask]]) test_fold_tcheby = training_tcheby_array[random_index[start:stop]] test_fold_predict = clfCV.predict(training_inputs_array[random_index[start:stop]]) R2_cv .append(r2_score (test_fold_tcheby, test_fold_predict)) MSE_cv .append(mean_squared_error (test_fold_tcheby, test_fold_predict)) MAE_cv .append(mean_absolute_error (test_fold_tcheby, test_fold_predict)) MDAE_cv.append(median_absolute_error(test_fold_tcheby, test_fold_predict)) R2 = clf.score(training_inputs, training_outputs) MSE_cv_mean = numpy.mean(MSE_cv) RMSE_cv_mean = math.sqrt(MSE_cv_mean) MAE_cv_mean = numpy.mean(MAE_cv) MDAE_cv_mean = numpy.mean(MDAE_cv) R2_cv_mean = numpy.mean(R2_cv) iot.printR2(file_to_writeR2, eval_to.getNbEvals(), itera, R2, R2_cv_mean, MSE_cv_mean , MAE_cv_mean, MDAE_cv_mean, RMSE_cv_mean, problem_size, print_every=1) #random course through the directions random.shuffle(id_directions) #functions loop for f in id_directions: #get all the indice of neighbors of a function in a certain distance of f and include f in f_neighbors, current_neighbourhing_size = nt.getNeighborsOf(f, delta_neighbourhood) #get a list of offspring from the neighbors list_offspring = samp_to.extended_sampling(f, f_neighbors, sampling_param, nb_samples) #apply a filter on the offspring list and select the best one filter_param = [itera, f, clf, clf2, two_models_bool, f_neighbors, list_offspring, model_directions, start_fct, problem_size, eval_to.getZstar_with_decal(), best_decisions_scores, best_decisions, nb_objectives] best_candidate = filt_to.model_based_filtring(filter_strat, free_eval, filter_param) #evaluation of the newly made solution mix_scores = eval_to.eval(start_fct, best_candidate, problem_size) #MAJ of the z_star point has_changed = eval_to.min_update_Z_star(mix_scores, nb_objectives) #retraining of the model with the new z_star if(has_changed): train_to.updateTrainingZstar(eval_to.getZstar_with_decal()) training_outputs = train_to.retrainSet(training_inputs, training_scores, eval_to.getZstar_with_decal(), training_set_size, nb_objectives) clf.fit(training_inputs, training_outputs) #add to training input new_input = [] new_input.extend(best_candidate) all_decisions.append(new_input) all_decisions_scores.append(mix_scores) all_len += 1 #boolean that is True if the offspring has been add to the archive added_to_S = False #count how many best decisions has been changed by the newly offspring cmpt_best_maj = 0 #random course through the neighbors list random.shuffle(f_neighbors) #course through the neighbors list for j in f_neighbors: #stop if already max number of remplacement reach if(cmpt_best_maj >= max_decisions_maj): break #compute g_tcheby #wj = (directions[0][j],directions[1][j]) wj = [directions[obj][j] for obj in range(0,nb_objectives)] g_mix = eval_to.g_tcheby(wj, mix_scores, eval_to.getZstar_with_decal()) g_best = eval_to.g_tcheby(wj, best_decisions_scores[j], eval_to.getZstar_with_decal()) #if the g_tcheby of the new solution is less distant from the z_optimal solution than the current best solution of the function j if(g_mix < g_best): cmpt_best_maj += 1 best_decisions[j] = best_candidate best_decisions_scores[j] = mix_scores #if we manage the archive and the solution have not been add already if(archiveOK and not(added_to_S)): arch_to.archivePut(best_candidate, mix_scores) added_to_S = True #print("Update", itera, "done.") #if manage archive if(archiveOK): arch_to.maintain_archive() #if write the result in a file if(writeOK): iot.printObjectives(file_to_write, eval_to.getNbEvals(), itera+1, best_decisions_scores, problem_size, nb_objectives, print_every=param_print_every) continue #graphic update #yield arch_to.getArchiveScore(), best_decisions_scores, itera+1, eval_to.getNbEvals(), eval_to.getZstar_with_decal(), pop_size, isReals if(not free_eval and writeR2OK): qual_tools.computeQualityEvaluation() qual_tools.generateDiffPredFreeFile() return
y = seg['time_to_failure'].values[-1] y_train.loc[segment, 'time_to_failure'] = y X_train.loc[segment, 'ave'] = x.mean() X_train.loc[segment, 'std'] = x.std() X_train.loc[segment, 'max'] = x.max() X_train.loc[segment, 'min'] = x.min() scaler = StandardScaler() scaler.fit(X_train) X_train_scaled = scaler.transform(X_train) svm = NuSVR() svm.fit(X_train_scaled, y_train.values.flatten()) y_pred = svm.predict(X_train_scaled) score = mean_absolute_error(y_train.values.flatten(), y_pred) print(f'Score: {score:0.3f}') submission = pd.read_csv(os.path.join(PATH,'sample_submission.csv'), index_col='seg_id') X_test = pd.DataFrame(columns=X_train.columns, dtype=np.float64, index=submission.index) for seg_id in X_test.index: seg = pd.read_csv(os.path.join(PATH,'test/') + seg_id + '.csv') x = seg['acoustic_data'].values
def run_kernel(input_dir, verbose=False): if verbose: print(os.listdir(input_dir)) train = pd.read_csv( input_dir / 'train.csv', dtype={'acoustic_data': np.int16, 'time_to_failure': np.float64}) if verbose: print(train.head()) pd.options.display.precision = 15 print(train.head()) # Create a training file with simple derived features rows = 150_000 segments = int(np.floor(train.shape[0] / rows)) X_train = pd.DataFrame(index=range(segments), dtype=np.float64, columns=['ave', 'std', 'max', 'min']) y_train = pd.DataFrame(index=range(segments), dtype=np.float64, columns=['time_to_failure']) for segment in tqdm(range(segments)): seg = train.iloc[segment * rows:segment * rows + rows] x = seg['acoustic_data'].values y = seg['time_to_failure'].values[-1] y_train.loc[segment, 'time_to_failure'] = y X_train.loc[segment, 'ave'] = x.mean() X_train.loc[segment, 'std'] = x.std() X_train.loc[segment, 'max'] = x.max() X_train.loc[segment, 'min'] = x.min() if verbose: print(X_train.head()) scaler = StandardScaler() scaler.fit(X_train) X_train_scaled = scaler.transform(X_train) svm = NuSVR() svm.fit(X_train_scaled, y_train.values.flatten()) y_pred = svm.predict(X_train_scaled) if verbose: plt.figure(figsize=(6, 6)) plt.scatter(y_train.values.flatten(), y_pred) plt.xlim(0, 20) plt.ylim(0, 20) plt.xlabel('actual', fontsize=12) plt.ylabel('predicted', fontsize=12) plt.plot([(0, 0), (20, 20)], [(0, 0), (20, 20)]) plt.show() score = mean_absolute_error(y_train.values.flatten(), y_pred) if verbose: print(f'Score: {score:0.3f}') submission = pd.read_csv( input_dir / 'sample_submission.csv', index_col='seg_id') X_test = pd.DataFrame(columns=X_train.columns, dtype=np.float64, index=submission.index) for seg_id in X_test.index: seg = pd.read_csv(input_dir / ('test/' + seg_id + '.csv')) x = seg['acoustic_data'].values X_test.loc[seg_id, 'ave'] = x.mean() X_test.loc[seg_id, 'std'] = x.std() X_test.loc[seg_id, 'max'] = x.max() X_test.loc[seg_id, 'min'] = x.min() X_test_scaled = scaler.transform(X_test) submission['time_to_failure'] = svm.predict(X_test_scaled) submission.to_csv('submission.csv')
def nu_svr(dataframe, kernel='linear', target=None, drop_features=[], without_outliers=False, split=0.2): # Remove non-numerical and undesired features from dataframe dataframe = dataframe.loc[:, dataframe.dtypes != 'object'] dataframe = dataframe.drop(drop_features, axis=1) # Transform data into columns and define target variable numerical_features = dataframe.loc[:, dataframe.columns != target] X = np.nan_to_num( numerical_features.to_numpy()) # .reshape(numerical_features.shape) y = np.nan_to_num(dataframe[target].to_numpy() ) # .reshape(dataframe[target].shape[0], 1) # Split the data into training/testing sets testsplit = round(split * X.shape[0]) X_train = X[:-testsplit] X_test = X[-testsplit:] y_train = y[:-testsplit] y_test = y[-testsplit:] # Train linear regression model reg = NuSVR(kernel=kernel, C=1.0, nu=0.1) reg.fit(X_train, y_train) if kernel == 'linear': feature_importance = pd.Series( reg.coef_[0], index=numerical_features.columns) # only with linear kernel else: feature_importance = pd.Series() # Prediction with trained model y_pred = reg.predict(X_test) results = pd.Series() results['Train mean'] = np.mean(y_train) results['Train std'] = np.std(y_train) results['Test mean'] = np.mean(y_test) results['Test std'] = np.std(y_test) results['Prediction mean'] = np.mean(y_pred) results['Prediction std'] = np.std(y_pred) results['Mean Squared Error'] = mean_squared_error(y_test, y_pred) results['Mean Absolute Error'] = mean_absolute_error(y_test, y_pred) results['R2 score'] = r2_score(y_test, y_pred) results['Explained variance score'] = explained_variance_score( y_test, y_pred) results['Cross-val R2 score (mean)'] = np.mean( cross_val_score(reg, X, y, cv=10, scoring="r2")) results['Cross-val R2 scores'] = cross_val_score(reg, X, y, cv=10, scoring="r2") results['Cross-val explained_variance score (mean)'] = np.mean( cross_val_score(reg, X, y, cv=10, scoring="explained_variance")) results['Cross-val explained_variance scores'] = cross_val_score( reg, X, y, cv=10, scoring="explained_variance") y_result = pd.DataFrame({'y_test': y_test, 'y_pred': y_pred}) return feature_importance, results, y_result, reg
'C': C }, np.zeros(len(X_train_scaled)), np.zeros(len(X_test_scaled))]) scores3_fold = [] print('Training model with') print(grid_search_results3[-1][0]) for fold_n, (train_index, valid_index) in enumerate(folds.split(X_train_scaled)): X_train, X_valid, X_test, Y_train, Y_valid = get_train_valid_test_samples( X_train_scaled, Y_tr, X_test_scaled, train_index, valid_index) Y_train = Y_train.squeeze() Y_valid = Y_valid.squeeze() model = NuSVR(gamma='scale', nu=nu, C=C, tol=0.01) model.fit(X_train, Y_train) Y_pred_valid = model.predict(X_valid).reshape(-1, ) scores3_fold.append(mean_absolute_error(Y_valid, Y_pred_valid)) print('Fold {0}. MAE: {1}.'.format(fold_n + 1, scores3_fold[-1])) grid_search_results3[-1][1][valid_index] = Y_pred_valid y_pred = model.predict(X_test).reshape(-1, ) grid_search_results3[-1][2] += y_pred scores3_total = np.mean(scores3_fold) grid_search_results3[-1][2] /= n_fold grid_search_results3[-1].append(scores3_total) grid_search_results3[-1].append('NuSVR') grid_search_results3[-1].append([]) if scores3_total < min_score3: min_score3 = scores3_total best_params3 = grid_search_results3[-1][0] oof[-1] = grid_search_results3[-1][1] prediction[-1] = grid_search_results3[-1][2]
featureVectors, targetVectors = util.formFeatureAndTargetVectorsMultiHorizon( correctedSeries, depth, horizon) outputFolderName = "Outputs/Outputs" + datetime.now().strftime( "%Y_%m_%d_%H_%M_%S") os.mkdir(outputFolderName) for i in range(horizon): # Train different models for different horizon # Train the model #model = Pipeline([('poly', PolynomialFeatures(degree=2)), ('linear', LinearRegression(fit_intercept=False))]) #model = NuSVR(kernel='linear', nu=1.0) model = NuSVR(kernel="rbf", nu=1.0, tol=1e-10, gamma=1.0) #model = RidgeCV() model.fit(featureVectors, targetVectors[:, i]) predictedTargetVectors = model.predict(featureVectors) # Plot the actual and predicted actual = targetVectors[:, i] predicted = predictedTargetVectors # Descale actual = util.scalingFunction.inverse_transform(actual) predicted = util.scalingFunction.inverse_transform(predicted) outplot = outputPlot.OutputPlot( outputFolderName + "/Prediction_horizon" + str(i + 1) + ".html", "Facebook Fans Change - Linear Regression", "Taylor Swift", "Time", "Output") outplot.setXSeries(np.arange(1, targetVectors.shape[0])) outplot.setYSeries('Actual Output', actual)
train_data_fold = (train_data_fold - data_mean) / data_std train_label_fold = (train_label_fold - label_mean) / label_std test_data_fold = (test_data_fold - data_mean) / data_std validate_data_fold = train_data_fold[validate_idx] validate_label_fold = train_label_fold[validate_idx] train_data_fold = train_data_fold[train_idx] train_label_fold = train_label_fold[train_idx] # train model = NuSVR(**params) model.fit( train_data_fold, train_label_fold, ) train_pred_fold = model.predict(train_data_fold) train_error = mean_absolute_error(train_label_fold, train_pred_fold) * label_std # pred on train validate_pred_fold = model.predict(validate_data_fold) validate_error = mean_absolute_error(validate_label_fold, validate_pred_fold) * label_std validate_pred_fold = validate_pred_fold * label_std + label_mean validate_pred_fold = np.clip(validate_pred_fold, 0, 50) validate_pred_fold = pd.DataFrame(validate_pred_fold) validate_pred_fold['idx'] = validate_idx pred_on_train.append(validate_pred_fold) record.append((train_error, validate_error)) print('Train Error:{}\nValidate Error:{}'.format(train_error,
# Form feature and target vectors featureVectors, targetVectors = util.formFeatureAndTargetVectorsMultiHorizon(correctedSeries, depth, horizon) outputFolderName = "Outputs/Outputs" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S") os.mkdir(outputFolderName) for i in range(horizon): # Train different models for different horizon # Train the model #model = Pipeline([('poly', PolynomialFeatures(degree=2)), ('linear', LinearRegression(fit_intercept=False))]) #model = NuSVR(kernel='linear', nu=1.0) model = NuSVR(kernel="rbf", nu=1.0, tol=1e-10, gamma=1.0) #model = RidgeCV() model.fit(featureVectors, targetVectors[:, i]) predictedTargetVectors = model.predict(featureVectors) # Plot the actual and predicted actual = targetVectors[:, i] predicted = predictedTargetVectors # Descale actual = util.scalingFunction.inverse_transform(actual) predicted = util.scalingFunction.inverse_transform(predicted) outplot = outputPlot.OutputPlot(outputFolderName + "/Prediction_horizon"+str(i+1)+".html", "Facebook Fans Change - Linear Regression", "Taylor Swift", "Time", "Output") outplot.setXSeries(np.arange(1, targetVectors.shape[0])) outplot.setYSeries('Actual Output', actual) outplot.setYSeries('Predicted Output', predicted) outplot.createOutput()
trainrms = sqrt(mean_squared_error(y_test, y_pred)) print("RFPCA : trainrms {}".format(trainrms)) plt.figure(figsize=(8, 8)) plt.scatter(y_test, y_pred) plt.xlabel('ytest', fontsize=12) plt.ylabel('RF', fontsize=12) plt.show() from sklearn.svm import NuSVR NuSVRreg = NuSVR(C=65.0, nu=.99) params = {'C': [45, 55, 65], 'nu': [1 / i for i in range(1, 10)]} NuSVRreg = GridSearchCV(NuSVRreg, params) NuSVRreg.fit(X_data, Y_data) # Make the prediction on the meshed x-axis (ask for MSE as well) y_NuSVRreg = NuSVRreg.predict(X_test) trainrms = sqrt(mean_squared_error(y_test, y_NuSVRreg)) print("NuSVRreg : trainrms {}".format(trainrms)) plt.figure(figsize=(8, 8)) plt.scatter(y_test, y_NuSVRreg) plt.xlabel('ytest', fontsize=12) plt.ylabel('RF', fontsize=12) plt.show() #============================================================================= # end RF #============================================================================= #============================================================================= # start XGS #=============================================================================
def train_model(X, y, X_test, model_type=None, params=None, folds=folds, feat_importance=False): preds_oof_all = np.zeros(len(X)) # out-of-fold predictions preds_test_all = np.zeros(len(X_test)) # test set predictions errors_oof_all = [] # mean absolute error for out-of-fold predictions feat_imp_all = pd.DataFrame() # ---------- Iterate over folds ---------- for fold_i, (train_i, oof_i) in enumerate(folds.split(X)): x_train, x_oof = X.iloc[train_i], X.iloc[oof_i] y_train, y_oof = y.iloc[train_i], y.iloc[oof_i] # ---------- Fit model and predict in current fold ---------- if model_type == "lgb": model = lgb.LGBMRegressor(**params, n_estimators=50_000, n_jobs=-1) model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_oof, y_oof)], eval_metric="mae", verbose=10_000, early_stopping_rounds=200) preds_oof = model.predict(x_oof) preds_test = model.predict(X_test, num_iteration=model.best_iteration_) if model_type == "xgb": xgb_train = xgb.DMatrix(x_train, y_train, feature_names=X.columns) xgb_oof = xgb.DMatrix(x_oof, y_oof, feature_names=X.columns) xgb_oof_nolabel = xgb.DMatrix(x_oof, feature_names=X.columns) xgb_test = xgb.DMatrix(X_test, feature_names=X.columns) model = xgb.train(dtrain=xgb_train, num_boost_round=20_000, evals=[(xgb_train, "train"), (xgb_oof, "valid_data")], early_stopping_rounds=200, verbose_eval=500, params=params) preds_oof = model.predict(xgb_oof_nolabel, ntree_limit=model.best_ntree_limit) preds_test = model.predict(xgb_test, ntree_limit=model.best_ntree_limit) if model_type == "nusvr": model = NuSVR(**params) model.fit(x_train, y_train) preds_oof = model.predict(x_oof) preds_test = model.predict(X_test) if model_type == "krr": model = KernelRidge(**params) model.fit(x_train, y_train) preds_oof = model.predict(x_oof).reshape( -1, ) # reshape from (n, 1) to (n, ) preds_test = model.predict(X_test).reshape(-1, ) # ---------- Save errors and predictions from fold ---------- preds_oof_all[ oof_i] = preds_oof # set out-of-fold preds to right index preds_test_all += preds_test # sum the predictions (to be averaged later over folds) error_oof = mean_absolute_error(y_oof, preds_oof) errors_oof_all.append(error_oof) # append errors from current fold if (model_type == "nusvr" or model_type == "krr"): print(f"Fold {fold_i + 1}. MAE: {error_oof:.4f}." ) # fold evaluation for sklearn models # ---------- Feature importance in fold for LGB ---------- if (model_type == "lgb" and feat_importance == True): feat_imp_fold = pd.DataFrame() feat_imp_fold["feature"] = X.columns feat_imp_fold["importance"] = model.feature_importances_ feat_imp_fold["fold"] = fold_i + 1 feat_imp_all = pd.concat([feat_imp_all, feat_imp_fold], axis=0) # ---------- Aggregate errors and predictions over all folds ---------- preds_test_all /= num_folds # average predictions mean_error = np.mean(errors_oof_all) std_error = np.std(errors_oof_all) print(f"CV error mean: {mean_error:.4f}, std: {std_error:.4f}") # ---------- Feature importance over all folds ---------- if (model_type == "lgb" and feat_importance == True): feat_imp_all["importance"] /= num_folds # average importances top_30_feats = feat_imp_all[[ "feature", "importance" ]].groupby("feature").mean().sort_values("importance", ascending=False)[0:30].index imp_values_top_30 = feat_imp_all.loc[feat_imp_all["feature"].isin( top_30_feats)] imp_values_top_30 = imp_values_top_30.sort_values( "importance", ascending=False ) # importance values from each of the 5 folds for the top-30 features ie 150 values plt.figure(figsize=(13, 7)) sns.barplot("importance", "feature", data=imp_values_top_30) plt.title("LGB best features (avg over folds)") return preds_oof_all, preds_test_all
os.chdir(folder) name_folder = folder.split("/")[6] train_data = np.array(pd.read_csv('train_data.csv', sep=';')) test_data = np.array(pd.read_csv('test_data.csv', sep=';')) train_labels = np.array(pd.read_csv('train_labels.csv', sep=';')) test_labels = np.array(pd.read_csv('test_labels.csv', sep=';')) inicio = time.time() # importar o modelo de regressão from sklearn.svm import NuSVR regression = NuSVR().fit(train_data, train_labels) # prever predictions_labels = regression.predict(test_data) fim = time.time() df_time = pd.DataFrame({'Execution Time:': [fim - inicio]}) output_path = os.path.join( '/home/isadorasalles/Documents/Regressao/Nu_svr', 'time_' + name_folder) df_time.to_csv(output_path, sep=';') from sklearn import metrics df_metrics = pd.DataFrame({ 'Mean Absolute Error': [metrics.mean_absolute_error(test_labels, predictions_labels)], 'Mean Squared Error':
trainingSeries, testingSeries = util.splitIntoTrainingAndTestingSeries(correctedSeries, horizon) # Learning Process - Start # Form the feature and target vectors featureVectors, targetVectors = formFeatureAndTargetVectors(trainingSeries) # Fit a model model = NuSVR(kernel="rbf", gamma=1.0, nu=1.0, tol=1e-15) model.fit(featureVectors, targetVectors[:, 0]) # Learning Process - End # Predict for testing data points testingFeatureVectors, testingTargetVectors = formFeatureAndTargetVectors(testingSeries) predictedTrainingOutputData = model.predict(testingFeatureVectors) # Predicted and actual Series actualSeries = testingSeries predictedSeries = pd.Series(data=predictedTrainingOutputData.flatten(), index=testingSeries.index) # Learning Process - End # Step 5 - Descale the series actualSeries = util.descaleSeries(actualSeries) predictedSeries = util.descaleSeries(predictedSeries) outputFolderName = "Outputs/"+str(profileName)+"Outputs" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S") util.plotSeries(outputFolderName, [actualSeries, predictedSeries], ["Actual Series", "Predicted Series"], "Facebook Fans Change", "Outlier Detection")
# Learning Process - Start # Parameters depth = 100 # Form feature and target vectors featureVectors, targetVectors = util.formContinousFeatureAndTargetVectorsWithoutBias(correctedSeries, depth) featureVectors, targetVectors = util.formFeatureAndTargetVectors(correctedSeries, depth) # # Train using linear regression #model = SVR(kernel="linear") model = NuSVR(nu=1.0, kernel="linear") model.fit(featureVectors, targetVectors[:, 0]) predictedTrainingOutputData = model.predict(featureVectors) targetVectors = targetVectors # Predicted and actual Series actualSeries = pd.Series(data=targetVectors.flatten(), index=correctedSeries.index[-targetVectors.shape[0]:]) predictedSeries = pd.Series(data=predictedTrainingOutputData.flatten(), index=correctedSeries.index[-targetVectors.shape[0]:]) # Learning Process - End # Step 5 - Descale the series actualSeries = util.descaleSeries(actualSeries) predictedSeries = util.descaleSeries(predictedSeries) outputFolderName = "Outputs/Outputs" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
def train_svr_cpu(X, Y, X_eval, c, kernel='linear', nu=0.5): svc = NuSVR(kernel=kernel, C=c, max_iter=100000, nu=nu, gamma='auto') svc.fit(X, Y) y_prob = svc.predict(X_eval) return y_prob
X_train_scaled = scaler.transform(X_train) print(X_train_scaled) # In[6]: #apply model #from sklearn.isotonic import IsotonicRegression #from sklearn.linear_model import ElasticNet #from sklearn.gaussian_process import GaussianProcessRegressor from sklearn import svm from sklearn.svm import NuSVR model = NuSVR() model.fit(X_train_scaled, y_train.values.flatten()) y_pred = model.predict(X_train_scaled) # In[7]: #plt.figure(figsize=(6, 6)) #plt.scatter(y_train.values, y_pred) #plt.xlim(0, 20) #plt.ylim(0, 20) #plt.xlabel('actual', fontsize=12) #plt.ylabel('predicted', fontsize=12) #plt.plot([(0, 0), (20, 20)], [(0, 0), (20, 20)]) #plt.show() plt.figure(figsize=(16, 8)) plt.plot(y_train, color='b', label='y_train') plt.plot(y_pred, color='gold', label='naive_model') plt.legend()
def _gerarPlotFit(list_index_real, list_y_real, list_index_previsto, list_y_previsto, list_index_real_original, x_predict_original, list_index_previsto_original, list_y_previsto_original, list_y_real_original, isFit, df_norm): global cach_fit #Plotando FIT if (isFit): x_fit_real = [x + 1 for x in np.arange(len(list_index_real))] y_fit_real = list_y_real x_fit_previsto = np.asarray( [x + 1 for x in np.arange(len(list_index_previsto))], dtype=np.int32) y_fit_previsto = list_y_previsto x_fit_previsto_original = np.asarray( [x + 1 for x in np.arange(len(list_index_previsto_original))], dtype=np.int32) y_fit_previsto_original = list_y_previsto_original x_fit_real_original = np.asarray( [x + 1 for x in np.arange(len(list_index_real_original))], dtype=np.int32) y_fit_real_original = list_y_real_original list_x = np.arange(len(df_norm.index)) parcela_x = (0 if len(x_fit_real) == 1 else ceil( len(x_fit_real) * 0.4)) #print(parcela_x) coefs_linear_reais = np.polyfit( x_fit_real, y_fit_real, 1, ) coefs_linear_previsto = np.polyfit(x_fit_previsto, y_fit_previsto, 1) coefs_linear_previsto_parcela = np.polyfit( x_fit_previsto[parcela_x:len(x_fit_previsto)], y_fit_previsto[parcela_x:len(x_fit_previsto)], 1) coefs_linear_previsto_peso = np.polyfit(x_fit_previsto, y_fit_previsto, 1, w=np.sqrt( x_fit_previsto[::-1])) if (x_predict_original.sum() == 0 and len(cach_fit) != 0): ffit_reais = cach_fit[0] ffit_peso = cach_fit[1] ffit = cach_fit[2] fit_reta_previsto = cach_fit[3] fit_svr = cach_fit[4] fit_reta_previsto_parcela = cach_fit[5] fit_svr_ply = cach_fit[6] list_x = cach_fit[7] else: ffit_reais = np.poly1d(coefs_linear_reais) ffit_peso = np.poly1d(coefs_linear_previsto_peso) ffit = np.poly1d(coefs_linear_previsto) fit_reta_previsto_parcela = np.poly1d( coefs_linear_previsto_parcela) #FIT com Equação da Reta Reduzida [y = ax + b] fit_reta_previsto = [ ((y_fit_real_original[-1] - y_fit_real_original[0]) / (x_fit_real_original[-1] - x_fit_real_original[0])) * (x - x_fit_real_original[0]) + x_fit_real_original[0] for x in list_x ] svr_nu = NuSVR(kernel='linear', C=1, gamma='scale', nu=0.9) svr_nu_poly = NuSVR(kernel='rbf', C=1, gamma='scale', nu=0.9) svr_nu.fit((x_fit_previsto_original.reshape(-1, 1)), y_fit_previsto_original) svr_nu_poly.fit((x_fit_previsto_original.reshape(-1, 1)), y_fit_previsto_original) fit_svr = svr_nu.predict(list_x.reshape(-1, 1)) fit_svr_ply = svr_nu_poly.predict(list_x.reshape(-1, 1)) cach_fit = (ffit_reais, ffit_peso, ffit, fit_reta_previsto, fit_svr, fit_reta_previsto_parcela, fit_svr_ply, list_x) # legend_fit_real,= plt.plot(df_norm.index, ffit_reais(list_x), color="orange", linestyle='--', label="FIT [pontos reais]") # legend_fit_previsto, = plt.plot(df_norm.index, ffit_peso(list_x), color="red", linestyle='--', label= "FIT [pontos reais + último ponto previsto] PESO (SQRT)") # legend_fit_previsto_sem_peso, = plt.plot(df_norm.index, ffit(list_x), color="g", linestyle='--', label= "FIT [pontos reais + último ponto previsto] Sem peso") # legend_fit_previsto_reta, = plt.plot(df_norm.index,fit_reta_previsto, color="chocolate", linestyle='--', label= "FIT Equacao da Reta") # legend_fit_previsto_sem_peso_parcela, = plt.plot(df_norm.index, fit_reta_previsto_parcela(list_x), color="slategray", linestyle='--', label= "FIT [pontos reais + último ponto previsto - parcela] Sem peso") legend_fit_previsto_svr, = plt.plot(df_norm.index, fit_svr, color="mediumvioletred", linestyle='--', label="FIT SVR [Linear]") #legend_fit_previsto_svr_poly, = plt.plot(df_norm.index,fit_svr_ply, color="red", linestyle='--', label= "FIT SVR [Poly]") # list_legend_fit = [legend_fit_previsto, legend_fit_previsto_sem_peso, legend_fit_real, legend_fit_previsto_reta,legend_fit_previsto_svr,legend_fit_previsto_sem_peso_parcela] list_legend_fit = [legend_fit_previsto_svr] return list_legend_fit
class NuSvrClass: """ Name : NuSVR Attribute : None Method : predict, predict_by_cv, save_model """ def __init__(self): # 알고리즘 이름 self._name = 'nusvr' # 기본 경로 self._f_path = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)) # 경고 메시지 삭제 warnings.filterwarnings('ignore') # 원본 데이터 로드 data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8") # 학습 및 테스트 데이터 분리 self._x = (data["year"] <= 2017) self._y = (data["year"] >= 2018) # 학습 데이터 분리 self._x_train, self._y_train = self.preprocessing(data[self._x]) # 테스트 데이터 분리 self._x_test, self._y_test = self.preprocessing(data[self._y]) # 모델 선언 self._model = NuSVR(nu=0.5, cache_size=100) # 모델 학습 self._model.fit(self._x_train, self._y_train) # 데이터 전처리 def preprocessing(self, data): # 학습 x = [] # 레이블 y = [] # 기준점(7일) base_interval = 7 # 기온 temps = list(data["temperature"]) for i in range(len(temps)): if i < base_interval: continue y.append(temps[i]) xa = [] for p in range(base_interval): d = i + p - base_interval xa.append(temps[d]) x.append(xa) return x, y # 일반 예측 def predict(self, save_img=False, show_chart=False): # 예측 y_pred = self._model.predict(self._x_test) # 스코어 정보 score = r2_score(self._y_test, y_pred) # 리포트 확인 if hasattr(self._model, 'coef_') and hasattr(self._model, 'intercept_'): print(f'Coef = {self._model.coef_}') print(f'intercept = {self._model.intercept_}') print(f'Score = {score}') # 이미지 저장 여부 if save_img: self.save_chart_image(y_pred, show_chart) # 예측 값 & 스코어 return [list(y_pred), score] # CV 예측(Cross Validation) def predict_by_cv(self): # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현 return False # GridSearchCV 예측 def predict_by_gs(self): pass # 모델 저장 및 갱신 def save_model(self, renew=False): # 모델 저장 if not renew: # 처음 저장 joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') else: # 기존 모델 대체 if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'): os.rename( self._f_path + f'/model/{self._name}_rg.pkl', self._f_path + f'/model/{str(self._name) + str(time.time())}_rg.pkl') joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') # 회귀 차트 저장 def save_chart_image(self, data, show_chart): # 사이즈 plt.figure(figsize=(15, 10), dpi=100) # 레이블 plt.plot(self._y_test, c='r') # 예측 값 plt.plot(data, c='b') # 이미지로 저장 plt.savefig('./chart_images/tenki-kion-lr.png') # 차트 확인(Optional) if show_chart: plt.show() def __del__(self): del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model
def nusvrtrain(x, y, pre_x): x, pre_x = datscater(x, pre_x) clf = NuSVR(C = 5.0).fit(x, y) pred = clf.predict(pre_x) return pred
Kt = K[trainIdx][:, trainIdx] #n = len(trainIdx) #nv = len(valIdx) #nx = len(testIdx) #Train Support Vector Regression # C = 10.^(-2:1:2); C = [0.1] for c in C: print("C = %f" % c) tic = time.time() svr = NuSVR(C=c, kernel='precomputed') svr.fit(Kt, trainLabels) toc = time.time() print("train cost %f s" % (toc - tic)) trainScores = svr.predict(Kt) mseTrain = np.mean((trainLabels - trainScores)**2) valScores = svr.predict(Kv) mseVal = np.mean((valLabels - valScores)**2) testScores = svr.predict(Kx) mseTest = np.mean((testLabels - testScores)**2) print('Train MSE : %g' % mseTrain) print('val MSE : %g' % mseVal) print('Test MSE : %g' % mseTest) # use all samples to train svr = NuSVR(C=c, kernel='precomputed') svr.fit(K, labels) joblib.dump(svr, 'svr.pkl', compress=3)
# Learning Process - Start # Parameters depth = 100 # Form feature and target vectors featureVectors, targetVectors = util.formContinousFeatureAndTargetVectorsWithoutBias( correctedSeries, depth) featureVectors, targetVectors = util.formFeatureAndTargetVectors( correctedSeries, depth) # # Train using linear regression #model = SVR(kernel="linear") model = NuSVR(nu=1.0, kernel="linear") model.fit(featureVectors, targetVectors[:, 0]) predictedTrainingOutputData = model.predict(featureVectors) targetVectors = targetVectors # Predicted and actual Series actualSeries = pd.Series(data=targetVectors.flatten(), index=correctedSeries.index[-targetVectors.shape[0]:]) predictedSeries = pd.Series( data=predictedTrainingOutputData.flatten(), index=correctedSeries.index[-targetVectors.shape[0]:]) # Learning Process - End # Step 5 - Descale the series actualSeries = util.descaleSeries(actualSeries) predictedSeries = util.descaleSeries(predictedSeries)
y_train = data[col_heading[-2:]].values X = data.drop([ 'GT_Compressor_decay_state_coefficient', 'GT_Turbine_decay_state_coefficient' ], axis=1) y1 = pd.DataFrame(data=y_train[:, 0], columns=[final_cols[-2]]) y2 = pd.DataFrame(data=y_train[:, 1], columns=[final_cols[-1]]) scaler = StandardScaler() X_train = scaler.fit_transform(X_train) scaled_X = pd.DataFrame(data=X_train, columns=final_cols[:-2]) reg1 = NuSVR() reg1.fit(X_train, y_train[:, 0]) reg2 = NuSVR() reg2.fit(X_train, y_train[:, 1]) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_train, y_pred=y_pred) metrics = getMetrics(y_true=y_train, y_pred=y_pred) fig1, ax1 = plt.subplots(figsize=(15, 15)) myplot1 = plot_partial_dependence(reg1, scaled_X, final_cols[:-2], ax=ax1, n_jobs=-1) myplot1.plot() fig1.savefig('GT_Compressor_decay_state_coefficient.png') fig2, ax2 = plt.subplots(figsize=(15, 15)) myplot2 = plot_partial_dependence(reg2, scaled_X,
# 'gamma':'auto' # } # val=cross_val_score(NuSVR(**params),X_train_scaled,y_tr,scoring='neg_mean_absolute_error',cv=5).mean() # return val # nusvr_bo=BayesianOptimization(nusvr_cv_score,params_nusvr) # nusvr_bo.maximize() # max_param_nusvr=nusvr_bo.max['params'] # max_param_nusvr['gamma']='auto' # print(max_param_nusvr) # opt_nusvr_reg=NuSVR(**max_param_nusvr) # NuSVR(gamma='scale', nu=0.7, tol=0.01, C=1.0) opt_nusvr_reg = NuSVR(gamma='scale', nu=0.9, C=10.0, tol=0.01) opt_nusvr_reg.fit(X_train_scaled, y_tr) y_pred = opt_nusvr_reg.predict(X_test_scaled).reshape(-1, ) # params_nusvr_grid={ # 'nu':[0.1,0.3,0.5,0.7,1], # 'C':[1], # 'tol':[0.01,0.03,0.05,0.07,0.1] # } # nusvr_reg=NuSVR() # grid_search_nusvr=GridSearchCV(nusvr_reg,params_nusvr_grid,cv=4,scoring='neg_mean_absolute_error') # grid_search_nusvr.fit(X_train_scaled,y_tr) # print(grid_search_nusvr.cv_results_) # y_pred=grid_search_nusvr.predict(X_test_scaled).reshape(-1,) submission['time_to_failure'] = y_pred # submission['time_to_failure'] = prediction_lgb_stack submission.to_csv('nusvr.csv', index=False)
# Run trained SVR on full record: # wdw_beg = 1 wdw_end = 15000 regr_idx = 0 fetal_lead_wdw = np.zeros([(wdw_end - wdw_beg),]) mat_lead_wdw = np.zeros([(wdw_end - wdw_beg),]) cwt_wdw = np.zeros([(wdw_end - wdw_beg), n_feats]) for wdw_idx in np.arange(wdw_beg, wdw_end): fetal_lead_wdw[regr_idx] = fetal_lead[wdw_idx] mat_lead_wdw[regr_idx] = mat_lead[wdw_idx] blef = cwt_trans[wdw_idx - cwt_wdw_lth_h : wdw_idx + cwt_wdw_lth_h -1, :] cwt_wdw[regr_idx,:] = blef.flatten() regr_idx = regr_idx +1 z_rbf = nusv_res.predict(cwt_wdw) figz = make_subplots(rows=2, cols=1) figz.append_trace(go.Scatter(x = x_idxs, y = mat_lead_wdw), row=1, col=1) figz.append_trace(go.Scatter(x = x_idxs, y = fetal_lead_wdw), row=2, col=1) figz.append_trace(go.Scatter(x = x_idxs, y = z_rbf), row=2, col=1) figz.show() # plt.plot(fetal_lead[500:700]) # plt.plot(svr_rbf.predict(cwt_trans[500:700,:])) arf = 12
# Form feature and target vectors featureVectors, targetVectors = util.formFeatureAndTargetVectorsMultiHorizon(trainingSeries, depth, horizon) predictedSeries = [] outputFolderName = "Outputs/Outputs" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S") os.mkdir(outputFolderName) for i in range(horizon): # Train different models for different horizon # Train the model model = NuSVR(kernel="rbf", gamma=1.0, nu=1.0, C=4, tol=1e-10) model.fit(featureVectors, targetVectors[:, i]) # Now, predict the future featureVector = availableSeries.values[-depth:].reshape(1,depth) predicted = model.predict(featureVector) predictedSeries.append(predicted) predictedSeries = pd.Series(data=np.array(predictedSeries).flatten(), index=testingSeries.index) # Descale the series predictedSeries = util.descaleSeries(predictedSeries) actualSeries = util.descaleSeries(testingSeries) # Plot the results details = profileName + "_horizon_" + str(horizon) + "_depth_" + str(depth) util.plotSeries("Outputs/Outputs_" + str(datetime.now()) + details, [actualSeries, predictedSeries], ["Actual Output", "Predicted Output"], "Facebook Fans Change - "+profileName, "Taylor Swift")