Пример #1
0
def NuSVRRegressor(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = NuSVR()
    reg1.fit(X_train, y_train1)
    reg2 = NuSVR()
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="NuSVRRegressor",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
Пример #2
0
def NuSVRRegressorGS(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = NuSVR()
    reg2 = NuSVR()
    grid_values = {
        'nu': [value * 0.1 for value in range(1, 3)],
        'C': list(range(1, 3)),
        'kernel': ['poly', 'rbf'],
        'degree': list(range(1, 3))
    }

    grid_reg1 = GridSearchCV(
        reg1,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg1.fit(X_train, y_train1)
    reg1 = grid_reg1.best_estimator_
    reg1.fit(X_train, y_train1)
    grid_reg2 = GridSearchCV(
        reg2,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg2.fit(X_train, y_train2)
    reg2 = grid_reg1.best_estimator_
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params1: dict = grid_reg1.best_params_
    best_params2: dict = grid_reg2.best_params_
    best_params = {}
    for key in best_params1.keys():
        best_params[key] = [best_params1[key], best_params2[key]]
    saveBestParams(nameOfModel="NuSVRRegressorGS", best_params=best_params)
    logSave(nameOfModel="NuSVRRegressorGS",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
Пример #3
0
class MIKernelSVR(MIKernelSVM):

    def __init__(self, **parameters):
        svr_params = {
            'kernel' : 'precomputed',
            'max_iter': MAX_ITERS,
        }
        if 'C' in parameters:
            svr_params['C'] = parameters.pop('C')
        if 'nu' in parameters:
            svr_params['nu'] = parameters.pop('nu')
        self.estimator = NuSVR(**svr_params)

        # Get kernel name and pass remaining parameters to kernel
        mi_kernel_name = parameters.pop('kernel')
        self.mi_kernel = kernel.by_name(mi_kernel_name, **parameters)

    def fit(self, X, y):
        X = map(np.asarray, X)
        self.fit_data = X
        self.gram_matrix = self.mi_kernel(X, X)
        self.estimator.fit(self.gram_matrix, y)
        return self

    def predict(self, X=None):
        if X is None:
            gram_matrix = self.gram_matrix
        else:
            X = map(np.asarray, X)
            gram_matrix = self.mi_kernel(X, self.fit_data)
        return self.estimator.predict(gram_matrix)
Пример #4
0
class MIKernelSVR(MIKernelSVM):

    def __init__(self, **parameters):
        svr_params = {
            'kernel' : 'precomputed',
            'max_iter': MAX_ITERS,
        }
        if 'C' in parameters:
            svr_params['C'] = parameters.pop('C')
        if 'nu' in parameters:
            svr_params['nu'] = parameters.pop('nu')
        self.estimator = NuSVR(**svr_params)

        # Get kernel name and pass remaining parameters to kernel
        mi_kernel_name = parameters.pop('kernel')
        self.mi_kernel = kernel.by_name(mi_kernel_name, **parameters)

    def fit(self, X, y):
        X = map(np.asarray, X)
        self.fit_data = X
        self.gram_matrix = self.mi_kernel(X, X)
        self.estimator.fit(self.gram_matrix, y)
        return self

    def predict(self, X=None):
        if X is None:
            gram_matrix = self.gram_matrix
        else:
            X = map(np.asarray, X)
            gram_matrix = self.mi_kernel(X, self.fit_data)
        return self.estimator.predict(gram_matrix)
Пример #5
0
 def predict(self, X):
     if hasattr(self, '_onedal_estimator'):
         logging.info("sklearn.svm.NuSVR.predict: " +
                      get_patch_message("onedal"))
         return self._onedal_estimator.predict(X)
     else:
         logging.info("sklearn.svm.NuSVR.predict: " +
                      get_patch_message("sklearn"))
         return sklearn_NuSVR.predict(self, X)
Пример #6
0
class _NuSVRImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Пример #7
0
def applySVR(X_train, X_test, y_train, n_components, gamma):
    print('n_components=', n_components, 'gamma=', gamma)
    """To apply PCA to reduce time. I experimented with quite a values of this.
	Around 150 is the number of features/components that seem to work good for this problem.
	Anyways, a better idea would be check it up again manually by experimenting."""

    # pca = PCA(n_components=n_components).fit(X_train)

    # X_train = pca.transform(X_train)
    # X_test = pca.transform(X_test)

    # clf = NuSVR(C=100.0, cache_size=200, coef0=0.0, degree=3, gamma=gamma,
    #    kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
    #    verbose=False)

    clf = NuSVR(C=100,
                cache_size=200,
                coef0=0.0,
                degree=3,
                gamma=gamma,
                kernel='rbf',
                max_iter=-1,
                nu=0.5,
                shrinking=True,
                tol=0.001,
                verbose=False)

    clf.fit(X_train, y_train)
    np.set_printoptions(threshold=np.inf)
    #print(len(clf.support_), clf.support_)

    print('number of test data', len(X_test))
    y_rbf = clf.predict(X_test)
    print('\n\npredictions\n\n')
    # print(y_rbf)
    for i in range(len(y_rbf)):
        # print(X_test[i])
        print(test_files[i] + ", " + str(y_rbf[i]))

    # print('predictions made are as follows.')
    # for i in range(len(y_rbf)):
    # 	print(y_rbf[i], y_test[i])

    #for y in y_rbf:
    #	print(y, end=' ')
#
    """These are the set of methods which are useful metrics. The paper used rmse value as one of the metrics.
Пример #8
0
    def _test_evaluation(self, allow_slow):
        """
        Test that the same predictions are made
        """

        # Generate some smallish (some kernels take too long on anything else) random data
        x, y = [], []
        for _ in range(50):
            cur_x1, cur_x2 = random.gauss(2,3), random.gauss(-1,2)
            x.append([cur_x1, cur_x2])
            y.append( 1 + 2*cur_x1 + 3*cur_x2 )

        input_names = ['x1', 'x2']
        df = pd.DataFrame(x, columns=input_names)

        # Parameters to test
        kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2},
                             {'kernel': 'linear'},
                             {'kernel': 'poly'},  {'kernel': 'poly', 'degree': 2},  {'kernel': 'poly', 'gamma': 0.75},
                                 {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2},
                             {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8},
                                 {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5}
                             ]
        non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'shrinking': True}, {'C': 0.5, 'shrinking': False, 'nu': 0.9}]

        # Test
        for param1 in non_kernel_parameters:
            for param2 in kernel_parameters:
                cur_params = param1.copy()
                cur_params.update(param2)

                cur_model = NuSVR(**cur_params)
                cur_model.fit(x, y)
                df['prediction'] = cur_model.predict(x)

                spec = scikit_converter.convert(cur_model, input_names, 'target')

                if is_macos() and macos_version() >= (10, 13):
                    metrics = evaluate_regressor(spec, df)
                    self.assertAlmostEquals(metrics['max_error'], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
Пример #9
0
def stacking(base_models, X, Y, T):
    models = base_models
    folds = list(KFold(len(Y), n_folds=10, random_state=0))
    S_train = np.zeros((X.shape[0], len(models)))
    S_test = np.zeros((T.shape[0], len(models)))
    for i, bm in enumerate(models):
        clf = bm[1]
        S_test_i = np.zeros((T.shape[0], len(folds)))
        for j, (train_idx, test_idx) in enumerate(folds):
            X_train = X[train_idx]
            y_train = Y[train_idx]
            X_holdout = X[test_idx]
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_holdout)[:]
            S_train[test_idx, i] = y_pred
            S_test_i[:, j] = clf.predict(T)[:]
        S_test[:, i] = S_test_i.mean(1)
    nuss = NuSVR(kernel='rbf')
    nuss.fit(S_train, Y)
    yp = nuss.predict(S_test)[:]
    return yp
Пример #10
0
class TestNuSVRIntegration(TestCase):
    def setUp(self):
        df = pd.read_csv(path.join(BASE_DIR, '../models/categorical-test.csv'))
        Xte = df.iloc[:, 1:]
        Xenc = pd.get_dummies(Xte, prefix_sep='')
        yte = df.iloc[:, 0]
        self.test = (Xte, yte)
        self.enc = (Xenc, yte)

        pmml = path.join(BASE_DIR, '../models/svr-cat-pima.pmml')
        self.clf = PMMLNuSVR(pmml)

        self.ref = NuSVR()
        self.ref.fit(Xenc, yte == 'Yes')

    def test_fit_exception(self):
        with self.assertRaises(Exception) as cm:
            self.clf.fit(np.array([[]]), np.array([]))

        assert str(cm.exception) == 'Not supported.'

    def test_more_tags(self):
        assert self.clf._more_tags() == NuSVR()._more_tags()

    def test_sklearn2pmml(self):
        # Export to PMML
        pipeline = PMMLPipeline([("regressor", self.ref)])
        pipeline.fit(self.enc[0], self.enc[1] == 'Yes')
        sklearn2pmml(pipeline, "svr-sklearn2pmml.pmml", with_repr=True)

        try:
            # Import PMML
            model = PMMLNuSVR(pmml='svr-sklearn2pmml.pmml')

            # Verify classification
            Xenc, _ = self.enc
            assert np.allclose(self.ref.predict(Xenc), model.predict(Xenc))

        finally:
            remove("svr-sklearn2pmml.pmml")
Пример #11
0
#!/usr/bin/env python
# coding=utf-8

import os
from sklearn.svm import NuSVR
from settings import DATA_DIR
from utils import load_data, split_dataset

FILENAME = os.path.join(DATA_DIR, 'boston_house_prices.csv')

if __name__ == '__main__':
    dataset = load_data(FILENAME)
    train_set, test_set = split_dataset(dataset)
    X = [train_data[:-1] for train_data in train_set]
    y = [train_data[-1] for train_data in train_set]
    X_test = [test_data[:-1] for test_data in test_set]
    X_classies = [test_data[-1] for test_data in test_set]

    clf = NuSVR()
    clf.fit(X, y)
    predicts = clf.predict(X_test)
    bias = 0.0
    for (i, predict) in enumerate(predicts):
        bias += abs(predict - X_classies[i])
    print bias / len(X_classies)
Пример #12
0

def r(n, dp=4):
    return round(n, dp)


data = pd.read_csv("../data/cardio_1dp.csv")
test = pd.read_csv("../data/new_cardio.csv")
x_train, y_train = data.iloc[:, :-1], data.iloc[:, -1]
x_test, y_test = test.iloc[:, :-1], test.iloc[:, -1]

results = []
# trainign starts here
for NU in [0.3, 0.5, 0.7, 0.9]:
    model = NuSVR(nu=NU, gamma="scale")
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)

    y_pred = [0 if i < 0.5 else 1 for i in y_pred]
    scores = {
        "accuracy": r(accuracy_score(y_test, y_pred)),
        "precision": r(precision_score(y_test, y_pred)),
        "recall": r(recall_score(y_test, y_pred)),
        "f1 score": r(f1_score(y_test, y_pred))
    }

    results.append((NU, scores))

for n, s in results:
    print(n, " " * 27, s)
Пример #13
0
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

# Reshape data to meet the requirements of the modle
y_train = np.reshape(y_train, [-1])
y_test = np.reshape(y_test, [-1])


# Training the Nu SVR model
print('Building and training the Nu SVR model...')
clf = NuSVR(kernel='poly', gamma=0.0523125)
clf.fit(x_train, y_train)

# Gathering predictions from the model
ytrain = clf.predict(x_train)
ytest = clf.predict(x_test)

# Print performance metrics
print('---------Training-------')
print('Explained Variance Score', explained_variance_score(y_train, ytrain), 'Out of 1.00')
print('Mean Absolute Error', mean_absolute_error(y_train, ytrain))
print('Mean Squared Error', mean_squared_error(y_train, y_train))
print('Median Absolute Error', median_absolute_error(y_train, ytrain))
print('R2 Score', r2_score(y_train, ytrain), 'Out of 1.00')
print('Average Percent Error', (mean_absolute_error(y_train, ytrain)/np.average(y_train)))

print('---------Testing-------')
print('Explained Variance Score', explained_variance_score(y_test, ytest), 'Out of 1.00')
print('Mean Absolute Error', mean_absolute_error(y_test, ytest))
print('Mean Squared Error', mean_squared_error(y_test, y_test))
Пример #14
0
# print "Max shuf: ", np.max(ramp)


# plt.plot(train_mask)
# plt.show()
# print np.shape(train_mask)

#    print np.shape(inp)
    #inp2 = np.vstack([inp, t])
X = np.array(X)
Y = np.array(Y)
print "X: ", np.shape(X)
print "Y: ", np.shape(Y)
print "isnans: ", np.sum(np.isnan(Y))

print "Fitting to S..."
S.fit(X[train_mask,:],Y[train_mask])

print "Saving S "

with open('S.pkl','wb') as file:
    pickle.dump(S, file, pickle.HIGHEST_PROTOCOL)

# plt.figure()
plt.plot(Y[test_mask],color='red',marker='.')
plt.plot(S.predict(X[test_mask,:]))
plt.show()



Пример #15
0
def runTcheby():
    global param, approx_pareto_front, archiveOK, NO_FILE_TO_WRITE

    ############################################################################
    # PARAMETER

    #clf = SVR(C=1.0, epsilon=0.1, kernel="rbf")
    clf = NuSVR(cache_size=2000, shrinking=True,verbose=True)
    clf2 = -1
    two_models_bool = False

    isReals = True
    start_fct, nb_functions                = param[0:2]
    nb_iterations, neighboring_size        = param[2:4]
    init_decisions, problem_size           = param[4:6]
    max_decisions_maj, delta_neighbourhood = param[6:8]
    CR, search_space                       = param[8:10]
    F, distrib_index_n                     = param[10:12]
    pm, operator_fct                       = param[12:14]
    nb_samples, training_neighborhood_size = param[14:16]
    strategy, file_to_write                = param[16:18]
    filter_strat, free_eval                = param[18:20]
    param_print_every, file_to_writeR2     = param[20:22]
    filenameDIR, filenameSCORE             = param[22:24]


    nb_objectives = len(start_fct)

    #get separatly offspring operator fct
    crossover_fct, mutation_fct, repair_fct = operator_fct

    best_decisions = copy.deepcopy(init_decisions)

    sampling_param = [crossover_fct, mutation_fct, repair_fct, best_decisions, F, problem_size, CR, search_space, distrib_index_n, pm]


    ############################################################################
    # INITIALISATION

    qual_tools.resetGlobalVariables(filenameDIR, filenameSCORE, nb_iterations, nb_functions)

    eval_to.resetEval()

    #get the directions weight for both starting functions
    directions = dec.getDirections(nb_functions, nb_objectives)

    #init the neighboring constant
    nt.initNeighboringTab(nb_functions, neighboring_size, directions, nb_objectives)

    #giving global visibility to the best_decisions to get the result at the end
    approx_pareto_front = best_decisions

    #initial best decisions scores
    best_decisions_scores = [eval_to.free_eval(start_fct, best_decisions[i], problem_size) for i in range(nb_functions)]

    pop_size = nb_functions

    #current optimal scores for both axes
    z_opt_scores = gt.getMinTabOf(best_decisions_scores)

    eval_to.initZstar(z_opt_scores)

    #get the first training part of the item we will learn on
    model_directions = train_to.getDirectionsTrainingMatrix(directions)

    #if the data shall be write in a file
    writeOK = False
    if(file_to_write != NO_FILE_TO_WRITE):
        writeOK = True

    writeR2OK = False
    if(file_to_writeR2 != NO_FILE_TO_WRITE):
        writeR2OK = True

    ############################################################################
    # MAIN ALGORITHM

    if(writeOK):
        iot.printObjectives(file_to_write, eval_to.getNbEvals(), 0,best_decisions_scores, problem_size, nb_objectives)

    #set of all the solution evaluated
    all_decisions        = copy.deepcopy(best_decisions)
    all_decisions_scores = copy.deepcopy(best_decisions_scores)
    all_len = nb_functions

    #IDs tab to allow a random course through the directions in the main loop
    id_directions = [i for i in range(nb_functions)]

    #iterations loop
    for itera in range(nb_iterations):
        #Update model
        training_inputs, training_outputs, training_set_size, training_scores = train_to.getTrainingSet(model_directions, all_decisions, all_decisions_scores ,eval_to.getZstar_with_decal(), strategy, nb_functions, training_neighborhood_size)
        print(len(training_outputs))
        clf.fit(training_inputs, training_outputs)
        if(writeR2OK):
            training_inputs_tcheby      = eval_to.getManyTcheby(training_inputs, training_scores, eval_to.getZstar_with_decal(), training_set_size)

            random_index = numpy.arange(0,training_set_size)
            numpy.random.shuffle(random_index)
            n_folds = 10
            folds_sizes = (training_set_size // n_folds) * numpy.ones(n_folds, dtype=numpy.int)
            folds_sizes[:training_set_size % n_folds] += 1

            training_inputs_array = numpy.array(training_inputs)
            training_tcheby_array = numpy.array(training_inputs_tcheby)

            R2_cv = []
            MSE_cv = []
            MAE_cv = []
            MDAE_cv = []

            clfCV = NuSVR()

            current = 0
            for fold_size in folds_sizes:
                start, stop = current, current + fold_size
                mask = numpy.ones(training_set_size, dtype=bool)
                mask[start:stop] = 0
                current = stop

                clfCV.fit(training_inputs_array[random_index[mask]], training_tcheby_array[random_index[mask]])

                test_fold_tcheby = training_tcheby_array[random_index[start:stop]]
                test_fold_predict = clfCV.predict(training_inputs_array[random_index[start:stop]])

                R2_cv  .append(r2_score             (test_fold_tcheby, test_fold_predict))
                MSE_cv .append(mean_squared_error   (test_fold_tcheby, test_fold_predict))
                MAE_cv .append(mean_absolute_error  (test_fold_tcheby, test_fold_predict))
                MDAE_cv.append(median_absolute_error(test_fold_tcheby, test_fold_predict))

            R2 = clf.score(training_inputs, training_outputs)
            MSE_cv_mean = numpy.mean(MSE_cv)
            RMSE_cv_mean = math.sqrt(MSE_cv_mean)
            MAE_cv_mean = numpy.mean(MAE_cv)
            MDAE_cv_mean = numpy.mean(MDAE_cv)
            R2_cv_mean = numpy.mean(R2_cv)

            iot.printR2(file_to_writeR2, eval_to.getNbEvals(), itera,  R2, R2_cv_mean, MSE_cv_mean , MAE_cv_mean, MDAE_cv_mean, RMSE_cv_mean, problem_size, print_every=1)

        #random course through the directions
        random.shuffle(id_directions)

        #functions loop
        for f in id_directions:

            #get all the indice of neighbors of a function in a certain distance of f and include f in
            f_neighbors, current_neighbourhing_size = nt.getNeighborsOf(f, delta_neighbourhood)

            #get a list of offspring from the neighbors
            list_offspring = samp_to.extended_sampling(f, f_neighbors, sampling_param, nb_samples)

            #apply a filter on the offspring list and select the best one
            filter_param = [itera, f, clf, clf2, two_models_bool, f_neighbors, list_offspring, model_directions, start_fct, problem_size, eval_to.getZstar_with_decal(), best_decisions_scores, best_decisions, nb_objectives]
            best_candidate = filt_to.model_based_filtring(filter_strat, free_eval, filter_param)

            #evaluation of the newly made solution
            mix_scores = eval_to.eval(start_fct, best_candidate, problem_size)

            #MAJ of the z_star point
            has_changed = eval_to.min_update_Z_star(mix_scores, nb_objectives)

            #retraining of the model with the new z_star
            if(has_changed):
                train_to.updateTrainingZstar(eval_to.getZstar_with_decal())
                training_outputs = train_to.retrainSet(training_inputs, training_scores, eval_to.getZstar_with_decal(), training_set_size, nb_objectives)
                clf.fit(training_inputs, training_outputs)

            #add to training input
            new_input = []
            new_input.extend(best_candidate)
            all_decisions.append(new_input)
            all_decisions_scores.append(mix_scores)
            all_len += 1

            #boolean that is True if the offspring has been add to the archive
            added_to_S = False

            #count how many best decisions has been changed by the newly offspring
            cmpt_best_maj = 0

            #random course through the neighbors list
            random.shuffle(f_neighbors)

            #course through the neighbors list
            for j in f_neighbors:

                #stop if already max number of remplacement reach
                if(cmpt_best_maj >= max_decisions_maj):
                    break


                #compute g_tcheby
                #wj = (directions[0][j],directions[1][j])
                wj = [directions[obj][j] for obj in range(0,nb_objectives)]
                g_mix = eval_to.g_tcheby(wj, mix_scores, eval_to.getZstar_with_decal())
                g_best = eval_to.g_tcheby(wj, best_decisions_scores[j], eval_to.getZstar_with_decal())


                #if the g_tcheby of the new solution is less distant from the z_optimal solution than the current best solution of the function j
                if(g_mix < g_best):
                    cmpt_best_maj += 1
                    best_decisions[j] = best_candidate
                    best_decisions_scores[j] = mix_scores

                    #if we manage the archive and the solution have not been add already
                    if(archiveOK and not(added_to_S)):
                       arch_to.archivePut(best_candidate, mix_scores)
                       added_to_S = True

        #print("Update", itera, "done.")

        #if manage archive
        if(archiveOK):
           arch_to.maintain_archive()

        #if write the result in a file
        if(writeOK):
            iot.printObjectives(file_to_write, eval_to.getNbEvals(), itera+1, best_decisions_scores, problem_size, nb_objectives, print_every=param_print_every)
            continue
        #graphic update
        #yield arch_to.getArchiveScore(), best_decisions_scores, itera+1, eval_to.getNbEvals(), eval_to.getZstar_with_decal(), pop_size, isReals
    if(not free_eval and writeR2OK):
        qual_tools.computeQualityEvaluation()
        qual_tools.generateDiffPredFreeFile()
    return
Пример #16
0
    y = seg['time_to_failure'].values[-1]
    
    y_train.loc[segment, 'time_to_failure'] = y
    
    X_train.loc[segment, 'ave'] = x.mean()
    X_train.loc[segment, 'std'] = x.std()
    X_train.loc[segment, 'max'] = x.max()
    X_train.loc[segment, 'min'] = x.min()

scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)

svm = NuSVR()
svm.fit(X_train_scaled, y_train.values.flatten())
y_pred = svm.predict(X_train_scaled)

score = mean_absolute_error(y_train.values.flatten(), y_pred)
print(f'Score: {score:0.3f}')

submission = pd.read_csv(os.path.join(PATH,'sample_submission.csv'), index_col='seg_id')

X_test = pd.DataFrame(columns=X_train.columns, dtype=np.float64, index=submission.index)



for seg_id in X_test.index:
    seg = pd.read_csv(os.path.join(PATH,'test/') + seg_id + '.csv')
    
    x = seg['acoustic_data'].values
    
def run_kernel(input_dir, verbose=False):
    if verbose:
        print(os.listdir(input_dir))

    train = pd.read_csv(
        input_dir / 'train.csv',
        dtype={'acoustic_data': np.int16, 'time_to_failure': np.float64})

    if verbose:
        print(train.head())

        pd.options.display.precision = 15

        print(train.head())

    # Create a training file with simple derived features

    rows = 150_000
    segments = int(np.floor(train.shape[0] / rows))

    X_train = pd.DataFrame(index=range(segments), dtype=np.float64,
                           columns=['ave', 'std', 'max', 'min'])
    y_train = pd.DataFrame(index=range(segments), dtype=np.float64,
                           columns=['time_to_failure'])

    for segment in tqdm(range(segments)):
        seg = train.iloc[segment * rows:segment * rows + rows]
        x = seg['acoustic_data'].values
        y = seg['time_to_failure'].values[-1]

        y_train.loc[segment, 'time_to_failure'] = y

        X_train.loc[segment, 'ave'] = x.mean()
        X_train.loc[segment, 'std'] = x.std()
        X_train.loc[segment, 'max'] = x.max()
        X_train.loc[segment, 'min'] = x.min()

    if verbose:
        print(X_train.head())

    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train)

    svm = NuSVR()
    svm.fit(X_train_scaled, y_train.values.flatten())
    y_pred = svm.predict(X_train_scaled)

    if verbose:
        plt.figure(figsize=(6, 6))
        plt.scatter(y_train.values.flatten(), y_pred)
        plt.xlim(0, 20)
        plt.ylim(0, 20)
        plt.xlabel('actual', fontsize=12)
        plt.ylabel('predicted', fontsize=12)
        plt.plot([(0, 0), (20, 20)], [(0, 0), (20, 20)])
        plt.show()

    score = mean_absolute_error(y_train.values.flatten(), y_pred)

    if verbose:
        print(f'Score: {score:0.3f}')

    submission = pd.read_csv(
        input_dir / 'sample_submission.csv', index_col='seg_id')

    X_test = pd.DataFrame(columns=X_train.columns,
                          dtype=np.float64, index=submission.index)

    for seg_id in X_test.index:
        seg = pd.read_csv(input_dir / ('test/' + seg_id + '.csv'))

        x = seg['acoustic_data'].values

        X_test.loc[seg_id, 'ave'] = x.mean()
        X_test.loc[seg_id, 'std'] = x.std()
        X_test.loc[seg_id, 'max'] = x.max()
        X_test.loc[seg_id, 'min'] = x.min()

    X_test_scaled = scaler.transform(X_test)
    submission['time_to_failure'] = svm.predict(X_test_scaled)
    submission.to_csv('submission.csv')
Пример #18
0
def nu_svr(dataframe,
           kernel='linear',
           target=None,
           drop_features=[],
           without_outliers=False,
           split=0.2):

    # Remove non-numerical and undesired features from dataframe
    dataframe = dataframe.loc[:, dataframe.dtypes != 'object']
    dataframe = dataframe.drop(drop_features, axis=1)

    # Transform data into columns and define target variable
    numerical_features = dataframe.loc[:, dataframe.columns != target]
    X = np.nan_to_num(
        numerical_features.to_numpy())  # .reshape(numerical_features.shape)
    y = np.nan_to_num(dataframe[target].to_numpy()
                      )  # .reshape(dataframe[target].shape[0], 1)

    # Split the data into training/testing sets
    testsplit = round(split * X.shape[0])
    X_train = X[:-testsplit]
    X_test = X[-testsplit:]
    y_train = y[:-testsplit]
    y_test = y[-testsplit:]

    # Train linear regression model
    reg = NuSVR(kernel=kernel, C=1.0, nu=0.1)
    reg.fit(X_train, y_train)
    if kernel == 'linear':
        feature_importance = pd.Series(
            reg.coef_[0],
            index=numerical_features.columns)  # only with linear kernel
    else:
        feature_importance = pd.Series()

    # Prediction with trained model
    y_pred = reg.predict(X_test)

    results = pd.Series()
    results['Train mean'] = np.mean(y_train)
    results['Train std'] = np.std(y_train)
    results['Test mean'] = np.mean(y_test)
    results['Test std'] = np.std(y_test)
    results['Prediction mean'] = np.mean(y_pred)
    results['Prediction std'] = np.std(y_pred)
    results['Mean Squared Error'] = mean_squared_error(y_test, y_pred)
    results['Mean Absolute Error'] = mean_absolute_error(y_test, y_pred)
    results['R2 score'] = r2_score(y_test, y_pred)
    results['Explained variance score'] = explained_variance_score(
        y_test, y_pred)
    results['Cross-val R2 score (mean)'] = np.mean(
        cross_val_score(reg, X, y, cv=10, scoring="r2"))
    results['Cross-val R2 scores'] = cross_val_score(reg,
                                                     X,
                                                     y,
                                                     cv=10,
                                                     scoring="r2")
    results['Cross-val explained_variance score (mean)'] = np.mean(
        cross_val_score(reg, X, y, cv=10, scoring="explained_variance"))
    results['Cross-val explained_variance scores'] = cross_val_score(
        reg, X, y, cv=10, scoring="explained_variance")

    y_result = pd.DataFrame({'y_test': y_test, 'y_pred': y_pred})
    return feature_importance, results, y_result, reg
     'C': C
 },
                              np.zeros(len(X_train_scaled)),
                              np.zeros(len(X_test_scaled))])
 scores3_fold = []
 print('Training model with')
 print(grid_search_results3[-1][0])
 for fold_n, (train_index,
              valid_index) in enumerate(folds.split(X_train_scaled)):
     X_train, X_valid, X_test, Y_train, Y_valid = get_train_valid_test_samples(
         X_train_scaled, Y_tr, X_test_scaled, train_index, valid_index)
     Y_train = Y_train.squeeze()
     Y_valid = Y_valid.squeeze()
     model = NuSVR(gamma='scale', nu=nu, C=C, tol=0.01)
     model.fit(X_train, Y_train)
     Y_pred_valid = model.predict(X_valid).reshape(-1, )
     scores3_fold.append(mean_absolute_error(Y_valid, Y_pred_valid))
     print('Fold {0}. MAE: {1}.'.format(fold_n + 1, scores3_fold[-1]))
     grid_search_results3[-1][1][valid_index] = Y_pred_valid
     y_pred = model.predict(X_test).reshape(-1, )
     grid_search_results3[-1][2] += y_pred
 scores3_total = np.mean(scores3_fold)
 grid_search_results3[-1][2] /= n_fold
 grid_search_results3[-1].append(scores3_total)
 grid_search_results3[-1].append('NuSVR')
 grid_search_results3[-1].append([])
 if scores3_total < min_score3:
     min_score3 = scores3_total
     best_params3 = grid_search_results3[-1][0]
     oof[-1] = grid_search_results3[-1][1]
     prediction[-1] = grid_search_results3[-1][2]
Пример #20
0
featureVectors, targetVectors = util.formFeatureAndTargetVectorsMultiHorizon(
    correctedSeries, depth, horizon)

outputFolderName = "Outputs/Outputs" + datetime.now().strftime(
    "%Y_%m_%d_%H_%M_%S")
os.mkdir(outputFolderName)
for i in range(horizon):
    # Train different models for different horizon
    # Train the model
    #model = Pipeline([('poly', PolynomialFeatures(degree=2)), ('linear', LinearRegression(fit_intercept=False))])
    #model = NuSVR(kernel='linear', nu=1.0)
    model = NuSVR(kernel="rbf", nu=1.0, tol=1e-10, gamma=1.0)
    #model = RidgeCV()
    model.fit(featureVectors, targetVectors[:, i])

    predictedTargetVectors = model.predict(featureVectors)

    # Plot the actual and predicted
    actual = targetVectors[:, i]
    predicted = predictedTargetVectors

    # Descale
    actual = util.scalingFunction.inverse_transform(actual)
    predicted = util.scalingFunction.inverse_transform(predicted)

    outplot = outputPlot.OutputPlot(
        outputFolderName + "/Prediction_horizon" + str(i + 1) + ".html",
        "Facebook Fans Change - Linear Regression", "Taylor Swift", "Time",
        "Output")
    outplot.setXSeries(np.arange(1, targetVectors.shape[0]))
    outplot.setYSeries('Actual Output', actual)
Пример #21
0
    train_data_fold = (train_data_fold - data_mean) / data_std
    train_label_fold = (train_label_fold - label_mean) / label_std
    test_data_fold = (test_data_fold - data_mean) / data_std

    validate_data_fold = train_data_fold[validate_idx]
    validate_label_fold = train_label_fold[validate_idx]
    train_data_fold = train_data_fold[train_idx]
    train_label_fold = train_label_fold[train_idx]

    # train
    model = NuSVR(**params)
    model.fit(
        train_data_fold,
        train_label_fold,
    )
    train_pred_fold = model.predict(train_data_fold)
    train_error = mean_absolute_error(train_label_fold,
                                      train_pred_fold) * label_std

    # pred on train
    validate_pred_fold = model.predict(validate_data_fold)
    validate_error = mean_absolute_error(validate_label_fold,
                                         validate_pred_fold) * label_std
    validate_pred_fold = validate_pred_fold * label_std + label_mean
    validate_pred_fold = np.clip(validate_pred_fold, 0, 50)
    validate_pred_fold = pd.DataFrame(validate_pred_fold)
    validate_pred_fold['idx'] = validate_idx
    pred_on_train.append(validate_pred_fold)

    record.append((train_error, validate_error))
    print('Train Error:{}\nValidate Error:{}'.format(train_error,
# Form feature and target vectors
featureVectors, targetVectors = util.formFeatureAndTargetVectorsMultiHorizon(correctedSeries, depth, horizon)


outputFolderName = "Outputs/Outputs" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
os.mkdir(outputFolderName)
for i in range(horizon):
    # Train different models for different horizon
    # Train the model
    #model = Pipeline([('poly', PolynomialFeatures(degree=2)), ('linear', LinearRegression(fit_intercept=False))])
    #model = NuSVR(kernel='linear', nu=1.0)
    model = NuSVR(kernel="rbf", nu=1.0, tol=1e-10, gamma=1.0)
    #model = RidgeCV()
    model.fit(featureVectors, targetVectors[:, i])

    predictedTargetVectors = model.predict(featureVectors)

    # Plot the actual and predicted
    actual = targetVectors[:, i]
    predicted = predictedTargetVectors

    # Descale
    actual = util.scalingFunction.inverse_transform(actual)
    predicted = util.scalingFunction.inverse_transform(predicted)

    outplot = outputPlot.OutputPlot(outputFolderName + "/Prediction_horizon"+str(i+1)+".html", "Facebook Fans Change - Linear Regression", "Taylor Swift", "Time", "Output")
    outplot.setXSeries(np.arange(1, targetVectors.shape[0]))
    outplot.setYSeries('Actual Output', actual)
    outplot.setYSeries('Predicted Output', predicted)
    outplot.createOutput()
Пример #23
0
trainrms = sqrt(mean_squared_error(y_test, y_pred))
print("RFPCA : trainrms {}".format(trainrms))

plt.figure(figsize=(8, 8))
plt.scatter(y_test, y_pred)
plt.xlabel('ytest', fontsize=12)
plt.ylabel('RF', fontsize=12)
plt.show()

from sklearn.svm import NuSVR
NuSVRreg = NuSVR(C=65.0, nu=.99)
params = {'C': [45, 55, 65], 'nu': [1 / i for i in range(1, 10)]}
NuSVRreg = GridSearchCV(NuSVRreg, params)
NuSVRreg.fit(X_data, Y_data)
# Make the prediction on the meshed x-axis (ask for MSE as well)
y_NuSVRreg = NuSVRreg.predict(X_test)
trainrms = sqrt(mean_squared_error(y_test, y_NuSVRreg))
print("NuSVRreg : trainrms {}".format(trainrms))
plt.figure(figsize=(8, 8))
plt.scatter(y_test, y_NuSVRreg)
plt.xlabel('ytest', fontsize=12)
plt.ylabel('RF', fontsize=12)
plt.show()

#=============================================================================
# end RF
#=============================================================================
#=============================================================================
# start XGS
#=============================================================================
Пример #24
0
def train_model(X,
                y,
                X_test,
                model_type=None,
                params=None,
                folds=folds,
                feat_importance=False):
    preds_oof_all = np.zeros(len(X))  # out-of-fold predictions
    preds_test_all = np.zeros(len(X_test))  # test set predictions
    errors_oof_all = []  # mean absolute error for out-of-fold predictions
    feat_imp_all = pd.DataFrame()

    # ---------- Iterate over folds ----------
    for fold_i, (train_i, oof_i) in enumerate(folds.split(X)):
        x_train, x_oof = X.iloc[train_i], X.iloc[oof_i]
        y_train, y_oof = y.iloc[train_i], y.iloc[oof_i]

        # ---------- Fit model and predict in current fold ----------
        if model_type == "lgb":
            model = lgb.LGBMRegressor(**params, n_estimators=50_000, n_jobs=-1)
            model.fit(x_train,
                      y_train,
                      eval_set=[(x_train, y_train), (x_oof, y_oof)],
                      eval_metric="mae",
                      verbose=10_000,
                      early_stopping_rounds=200)
            preds_oof = model.predict(x_oof)
            preds_test = model.predict(X_test,
                                       num_iteration=model.best_iteration_)

        if model_type == "xgb":
            xgb_train = xgb.DMatrix(x_train, y_train, feature_names=X.columns)
            xgb_oof = xgb.DMatrix(x_oof, y_oof, feature_names=X.columns)
            xgb_oof_nolabel = xgb.DMatrix(x_oof, feature_names=X.columns)
            xgb_test = xgb.DMatrix(X_test, feature_names=X.columns)
            model = xgb.train(dtrain=xgb_train,
                              num_boost_round=20_000,
                              evals=[(xgb_train, "train"),
                                     (xgb_oof, "valid_data")],
                              early_stopping_rounds=200,
                              verbose_eval=500,
                              params=params)
            preds_oof = model.predict(xgb_oof_nolabel,
                                      ntree_limit=model.best_ntree_limit)
            preds_test = model.predict(xgb_test,
                                       ntree_limit=model.best_ntree_limit)

        if model_type == "nusvr":
            model = NuSVR(**params)
            model.fit(x_train, y_train)
            preds_oof = model.predict(x_oof)
            preds_test = model.predict(X_test)

        if model_type == "krr":
            model = KernelRidge(**params)
            model.fit(x_train, y_train)
            preds_oof = model.predict(x_oof).reshape(
                -1, )  # reshape from (n, 1) to (n, )
            preds_test = model.predict(X_test).reshape(-1, )

        # ---------- Save errors and predictions from fold ----------
        preds_oof_all[
            oof_i] = preds_oof  # set out-of-fold preds to right index
        preds_test_all += preds_test  # sum the predictions (to be averaged later over folds)
        error_oof = mean_absolute_error(y_oof, preds_oof)
        errors_oof_all.append(error_oof)  # append errors from current fold

        if (model_type == "nusvr" or model_type == "krr"):
            print(f"Fold {fold_i + 1}. MAE: {error_oof:.4f}."
                  )  # fold evaluation for sklearn models

        # ---------- Feature importance in fold for LGB ----------
        if (model_type == "lgb" and feat_importance == True):
            feat_imp_fold = pd.DataFrame()
            feat_imp_fold["feature"] = X.columns
            feat_imp_fold["importance"] = model.feature_importances_
            feat_imp_fold["fold"] = fold_i + 1
            feat_imp_all = pd.concat([feat_imp_all, feat_imp_fold], axis=0)

    # ---------- Aggregate errors and predictions over all folds ----------
    preds_test_all /= num_folds  # average predictions
    mean_error = np.mean(errors_oof_all)
    std_error = np.std(errors_oof_all)
    print(f"CV error mean: {mean_error:.4f}, std: {std_error:.4f}")

    # ---------- Feature importance over all folds ----------
    if (model_type == "lgb" and feat_importance == True):
        feat_imp_all["importance"] /= num_folds  # average importances
        top_30_feats = feat_imp_all[[
            "feature", "importance"
        ]].groupby("feature").mean().sort_values("importance",
                                                 ascending=False)[0:30].index
        imp_values_top_30 = feat_imp_all.loc[feat_imp_all["feature"].isin(
            top_30_feats)]
        imp_values_top_30 = imp_values_top_30.sort_values(
            "importance", ascending=False
        )  # importance values from each of the 5 folds for the top-30 features ie 150 values
        plt.figure(figsize=(13, 7))
        sns.barplot("importance", "feature", data=imp_values_top_30)
        plt.title("LGB best features (avg over folds)")

    return preds_oof_all, preds_test_all
Пример #25
0
    os.chdir(folder)
    name_folder = folder.split("/")[6]
    train_data = np.array(pd.read_csv('train_data.csv', sep=';'))
    test_data = np.array(pd.read_csv('test_data.csv', sep=';'))
    train_labels = np.array(pd.read_csv('train_labels.csv', sep=';'))
    test_labels = np.array(pd.read_csv('test_labels.csv', sep=';'))

    inicio = time.time()

    # importar o modelo de regressão
    from sklearn.svm import NuSVR

    regression = NuSVR().fit(train_data, train_labels)

    # prever
    predictions_labels = regression.predict(test_data)

    fim = time.time()
    df_time = pd.DataFrame({'Execution Time:': [fim - inicio]})

    output_path = os.path.join(
        '/home/isadorasalles/Documents/Regressao/Nu_svr',
        'time_' + name_folder)
    df_time.to_csv(output_path, sep=';')

    from sklearn import metrics

    df_metrics = pd.DataFrame({
        'Mean Absolute Error':
        [metrics.mean_absolute_error(test_labels, predictions_labels)],
        'Mean Squared Error':
Пример #26
0
trainingSeries, testingSeries = util.splitIntoTrainingAndTestingSeries(correctedSeries, horizon)

# Learning Process - Start

# Form the feature and target vectors
featureVectors, targetVectors = formFeatureAndTargetVectors(trainingSeries)

# Fit a model
model = NuSVR(kernel="rbf", gamma=1.0, nu=1.0, tol=1e-15)
model.fit(featureVectors, targetVectors[:, 0])

# Learning Process - End

# Predict for testing data points
testingFeatureVectors, testingTargetVectors = formFeatureAndTargetVectors(testingSeries)
predictedTrainingOutputData = model.predict(testingFeatureVectors)

# Predicted and actual Series
actualSeries = testingSeries
predictedSeries = pd.Series(data=predictedTrainingOutputData.flatten(), index=testingSeries.index)

# Learning Process - End

# Step 5 - Descale the series
actualSeries = util.descaleSeries(actualSeries)
predictedSeries = util.descaleSeries(predictedSeries)


outputFolderName = "Outputs/"+str(profileName)+"Outputs" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
util.plotSeries(outputFolderName, [actualSeries, predictedSeries], ["Actual Series", "Predicted Series"], "Facebook Fans Change", "Outlier Detection")
# Learning Process - Start

# Parameters
depth = 100

# Form feature and target vectors
featureVectors, targetVectors = util.formContinousFeatureAndTargetVectorsWithoutBias(correctedSeries, depth)
featureVectors, targetVectors = util.formFeatureAndTargetVectors(correctedSeries, depth)


# # Train using linear regression
#model = SVR(kernel="linear")
model = NuSVR(nu=1.0, kernel="linear")
model.fit(featureVectors, targetVectors[:, 0])
predictedTrainingOutputData = model.predict(featureVectors)

targetVectors = targetVectors

# Predicted and actual Series
actualSeries = pd.Series(data=targetVectors.flatten(), index=correctedSeries.index[-targetVectors.shape[0]:])
predictedSeries = pd.Series(data=predictedTrainingOutputData.flatten(), index=correctedSeries.index[-targetVectors.shape[0]:])

# Learning Process - End

# Step 5 - Descale the series
actualSeries = util.descaleSeries(actualSeries)
predictedSeries = util.descaleSeries(predictedSeries)


outputFolderName = "Outputs/Outputs" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
Пример #28
0
def train_svr_cpu(X, Y, X_eval, c, kernel='linear', nu=0.5):
    svc = NuSVR(kernel=kernel, C=c, max_iter=100000, nu=nu, gamma='auto')
    svc.fit(X, Y)
    y_prob = svc.predict(X_eval)
    return y_prob
Пример #29
0
X_train_scaled = scaler.transform(X_train)
print(X_train_scaled)

# In[6]:
#apply model

#from sklearn.isotonic import IsotonicRegression
#from sklearn.linear_model import ElasticNet
#from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn import svm
from sklearn.svm import NuSVR

model = NuSVR()

model.fit(X_train_scaled, y_train.values.flatten())
y_pred = model.predict(X_train_scaled)

# In[7]:
#plt.figure(figsize=(6, 6))
#plt.scatter(y_train.values, y_pred)
#plt.xlim(0, 20)
#plt.ylim(0, 20)
#plt.xlabel('actual', fontsize=12)
#plt.ylabel('predicted', fontsize=12)
#plt.plot([(0, 0), (20, 20)], [(0, 0), (20, 20)])
#plt.show()

plt.figure(figsize=(16, 8))
plt.plot(y_train, color='b', label='y_train')
plt.plot(y_pred, color='gold', label='naive_model')
plt.legend()
Пример #30
0
def _gerarPlotFit(list_index_real, list_y_real, list_index_previsto,
                  list_y_previsto, list_index_real_original,
                  x_predict_original, list_index_previsto_original,
                  list_y_previsto_original, list_y_real_original, isFit,
                  df_norm):
    global cach_fit

    #Plotando FIT
    if (isFit):
        x_fit_real = [x + 1 for x in np.arange(len(list_index_real))]
        y_fit_real = list_y_real

        x_fit_previsto = np.asarray(
            [x + 1 for x in np.arange(len(list_index_previsto))],
            dtype=np.int32)
        y_fit_previsto = list_y_previsto

        x_fit_previsto_original = np.asarray(
            [x + 1 for x in np.arange(len(list_index_previsto_original))],
            dtype=np.int32)
        y_fit_previsto_original = list_y_previsto_original

        x_fit_real_original = np.asarray(
            [x + 1 for x in np.arange(len(list_index_real_original))],
            dtype=np.int32)
        y_fit_real_original = list_y_real_original

        list_x = np.arange(len(df_norm.index))
        parcela_x = (0 if len(x_fit_real) == 1 else ceil(
            len(x_fit_real) * 0.4))
        #print(parcela_x)
        coefs_linear_reais = np.polyfit(
            x_fit_real,
            y_fit_real,
            1,
        )
        coefs_linear_previsto = np.polyfit(x_fit_previsto, y_fit_previsto, 1)
        coefs_linear_previsto_parcela = np.polyfit(
            x_fit_previsto[parcela_x:len(x_fit_previsto)],
            y_fit_previsto[parcela_x:len(x_fit_previsto)], 1)
        coefs_linear_previsto_peso = np.polyfit(x_fit_previsto,
                                                y_fit_previsto,
                                                1,
                                                w=np.sqrt(
                                                    x_fit_previsto[::-1]))

        if (x_predict_original.sum() == 0 and len(cach_fit) != 0):
            ffit_reais = cach_fit[0]
            ffit_peso = cach_fit[1]
            ffit = cach_fit[2]
            fit_reta_previsto = cach_fit[3]
            fit_svr = cach_fit[4]
            fit_reta_previsto_parcela = cach_fit[5]
            fit_svr_ply = cach_fit[6]
            list_x = cach_fit[7]
        else:
            ffit_reais = np.poly1d(coefs_linear_reais)
            ffit_peso = np.poly1d(coefs_linear_previsto_peso)
            ffit = np.poly1d(coefs_linear_previsto)
            fit_reta_previsto_parcela = np.poly1d(
                coefs_linear_previsto_parcela)
            #FIT com Equação da Reta Reduzida [y = ax + b]
            fit_reta_previsto = [
                ((y_fit_real_original[-1] - y_fit_real_original[0]) /
                 (x_fit_real_original[-1] - x_fit_real_original[0])) *
                (x - x_fit_real_original[0]) + x_fit_real_original[0]
                for x in list_x
            ]

            svr_nu = NuSVR(kernel='linear', C=1, gamma='scale', nu=0.9)
            svr_nu_poly = NuSVR(kernel='rbf', C=1, gamma='scale', nu=0.9)
            svr_nu.fit((x_fit_previsto_original.reshape(-1, 1)),
                       y_fit_previsto_original)
            svr_nu_poly.fit((x_fit_previsto_original.reshape(-1, 1)),
                            y_fit_previsto_original)
            fit_svr = svr_nu.predict(list_x.reshape(-1, 1))
            fit_svr_ply = svr_nu_poly.predict(list_x.reshape(-1, 1))
            cach_fit = (ffit_reais, ffit_peso, ffit, fit_reta_previsto,
                        fit_svr, fit_reta_previsto_parcela, fit_svr_ply,
                        list_x)

    # legend_fit_real,= plt.plot(df_norm.index, ffit_reais(list_x), color="orange",  linestyle='--', label="FIT [pontos reais]")
    # legend_fit_previsto, = plt.plot(df_norm.index, ffit_peso(list_x), color="red",  linestyle='--', label= "FIT [pontos reais + último ponto previsto] PESO (SQRT)")
    # legend_fit_previsto_sem_peso, = plt.plot(df_norm.index, ffit(list_x), color="g",  linestyle='--', label= "FIT [pontos reais + último ponto previsto] Sem peso")
    # legend_fit_previsto_reta, = plt.plot(df_norm.index,fit_reta_previsto, color="chocolate",  linestyle='--', label= "FIT Equacao da Reta")
    # legend_fit_previsto_sem_peso_parcela, = plt.plot(df_norm.index, fit_reta_previsto_parcela(list_x), color="slategray",  linestyle='--', label= "FIT [pontos reais + último ponto previsto - parcela] Sem peso")

        legend_fit_previsto_svr, = plt.plot(df_norm.index,
                                            fit_svr,
                                            color="mediumvioletred",
                                            linestyle='--',
                                            label="FIT SVR [Linear]")
        #legend_fit_previsto_svr_poly, = plt.plot(df_norm.index,fit_svr_ply, color="red",  linestyle='--', label= "FIT SVR [Poly]")

        # list_legend_fit = [legend_fit_previsto, legend_fit_previsto_sem_peso, legend_fit_real, legend_fit_previsto_reta,legend_fit_previsto_svr,legend_fit_previsto_sem_peso_parcela]
        list_legend_fit = [legend_fit_previsto_svr]
        return list_legend_fit
Пример #31
0
class NuSvrClass:
    """
    Name      : NuSVR
    Attribute : None
    Method    : predict, predict_by_cv, save_model
    """
    def __init__(self):
        # 알고리즘 이름
        self._name = 'nusvr'

        # 기본 경로
        self._f_path = os.path.abspath(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         os.pardir))

        # 경고 메시지 삭제
        warnings.filterwarnings('ignore')

        # 원본 데이터 로드
        data = pd.read_csv(self._f_path +
                           "/regression/resource/regression_sample.csv",
                           sep=",",
                           encoding="utf-8")

        # 학습 및 테스트 데이터 분리
        self._x = (data["year"] <= 2017)
        self._y = (data["year"] >= 2018)

        # 학습 데이터 분리
        self._x_train, self._y_train = self.preprocessing(data[self._x])
        # 테스트 데이터 분리
        self._x_test, self._y_test = self.preprocessing(data[self._y])

        # 모델 선언
        self._model = NuSVR(nu=0.5, cache_size=100)

        # 모델 학습
        self._model.fit(self._x_train, self._y_train)

    # 데이터 전처리
    def preprocessing(self, data):
        # 학습
        x = []
        # 레이블
        y = []
        # 기준점(7일)
        base_interval = 7
        # 기온
        temps = list(data["temperature"])

        for i in range(len(temps)):
            if i < base_interval:
                continue
            y.append(temps[i])

            xa = []

            for p in range(base_interval):
                d = i + p - base_interval
                xa.append(temps[d])
            x.append(xa)
        return x, y

    # 일반 예측
    def predict(self, save_img=False, show_chart=False):
        # 예측
        y_pred = self._model.predict(self._x_test)

        # 스코어 정보
        score = r2_score(self._y_test, y_pred)

        # 리포트 확인
        if hasattr(self._model, 'coef_') and hasattr(self._model,
                                                     'intercept_'):
            print(f'Coef = {self._model.coef_}')
            print(f'intercept = {self._model.intercept_}')

        print(f'Score = {score}')

        # 이미지 저장 여부
        if save_img:
            self.save_chart_image(y_pred, show_chart)

        # 예측 값  & 스코어
        return [list(y_pred), score]

    #  CV 예측(Cross Validation)
    def predict_by_cv(self):
        # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현
        return False

    #  GridSearchCV 예측
    def predict_by_gs(self):
        pass

    # 모델 저장 및 갱신
    def save_model(self, renew=False):
        # 모델 저장
        if not renew:
            # 처음 저장
            joblib.dump(self._model,
                        self._f_path + f'/model/{self._name}_rg.pkl')
        else:
            # 기존 모델 대체
            if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'):
                os.rename(
                    self._f_path + f'/model/{self._name}_rg.pkl',
                    self._f_path +
                    f'/model/{str(self._name) + str(time.time())}_rg.pkl')
            joblib.dump(self._model,
                        self._f_path + f'/model/{self._name}_rg.pkl')

    # 회귀 차트 저장
    def save_chart_image(self, data, show_chart):
        # 사이즈
        plt.figure(figsize=(15, 10), dpi=100)

        # 레이블
        plt.plot(self._y_test, c='r')

        # 예측 값
        plt.plot(data, c='b')

        # 이미지로 저장
        plt.savefig('./chart_images/tenki-kion-lr.png')

        # 차트 확인(Optional)
        if show_chart:
            plt.show()

    def __del__(self):
        del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model
Пример #32
0
def nusvrtrain(x, y, pre_x):
	x, pre_x = datscater(x, pre_x)
	clf = NuSVR(C = 5.0).fit(x, y)
	pred = clf.predict(pre_x)
	return pred
Пример #33
0
Kt = K[trainIdx][:, trainIdx]

#n = len(trainIdx)
#nv = len(valIdx)
#nx = len(testIdx)

#Train Support Vector Regression
# C = 10.^(-2:1:2);
C = [0.1]
for c in C:
    print("C = %f" % c)
    tic = time.time()
    svr = NuSVR(C=c, kernel='precomputed')
    svr.fit(Kt, trainLabels)
    toc = time.time()
    print("train cost %f s" % (toc - tic))
    trainScores = svr.predict(Kt)
    mseTrain = np.mean((trainLabels - trainScores)**2)
    valScores = svr.predict(Kv)
    mseVal = np.mean((valLabels - valScores)**2)
    testScores = svr.predict(Kx)
    mseTest = np.mean((testLabels - testScores)**2)
    print('Train MSE : %g' % mseTrain)
    print('val MSE : %g' % mseVal)
    print('Test MSE : %g' % mseTest)

    # use all samples to train
    svr = NuSVR(C=c, kernel='precomputed')
    svr.fit(K, labels)
    joblib.dump(svr, 'svr.pkl', compress=3)
# Learning Process - Start

# Parameters
depth = 100

# Form feature and target vectors
featureVectors, targetVectors = util.formContinousFeatureAndTargetVectorsWithoutBias(
    correctedSeries, depth)
featureVectors, targetVectors = util.formFeatureAndTargetVectors(
    correctedSeries, depth)

# # Train using linear regression
#model = SVR(kernel="linear")
model = NuSVR(nu=1.0, kernel="linear")
model.fit(featureVectors, targetVectors[:, 0])
predictedTrainingOutputData = model.predict(featureVectors)

targetVectors = targetVectors

# Predicted and actual Series
actualSeries = pd.Series(data=targetVectors.flatten(),
                         index=correctedSeries.index[-targetVectors.shape[0]:])
predictedSeries = pd.Series(
    data=predictedTrainingOutputData.flatten(),
    index=correctedSeries.index[-targetVectors.shape[0]:])

# Learning Process - End

# Step 5 - Descale the series
actualSeries = util.descaleSeries(actualSeries)
predictedSeries = util.descaleSeries(predictedSeries)
Пример #35
0
y_train = data[col_heading[-2:]].values
X = data.drop([
    'GT_Compressor_decay_state_coefficient',
    'GT_Turbine_decay_state_coefficient'
],
              axis=1)
y1 = pd.DataFrame(data=y_train[:, 0], columns=[final_cols[-2]])
y2 = pd.DataFrame(data=y_train[:, 1], columns=[final_cols[-1]])
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
scaled_X = pd.DataFrame(data=X_train, columns=final_cols[:-2])
reg1 = NuSVR()
reg1.fit(X_train, y_train[:, 0])
reg2 = NuSVR()
reg2.fit(X_train, y_train[:, 1])
y_pred1 = reg1.predict(X=X_train)
y_pred2 = reg2.predict(X=X_train)
y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
printMetrics(y_true=y_train, y_pred=y_pred)
metrics = getMetrics(y_true=y_train, y_pred=y_pred)
fig1, ax1 = plt.subplots(figsize=(15, 15))
myplot1 = plot_partial_dependence(reg1,
                                  scaled_X,
                                  final_cols[:-2],
                                  ax=ax1,
                                  n_jobs=-1)
myplot1.plot()
fig1.savefig('GT_Compressor_decay_state_coefficient.png')
fig2, ax2 = plt.subplots(figsize=(15, 15))
myplot2 = plot_partial_dependence(reg2,
                                  scaled_X,
Пример #36
0
#         'gamma':'auto'
#     }
#     val=cross_val_score(NuSVR(**params),X_train_scaled,y_tr,scoring='neg_mean_absolute_error',cv=5).mean()
#     return val

# nusvr_bo=BayesianOptimization(nusvr_cv_score,params_nusvr)
# nusvr_bo.maximize()
# max_param_nusvr=nusvr_bo.max['params']
# max_param_nusvr['gamma']='auto'
# print(max_param_nusvr)
# opt_nusvr_reg=NuSVR(**max_param_nusvr)

# NuSVR(gamma='scale', nu=0.7, tol=0.01, C=1.0)
opt_nusvr_reg = NuSVR(gamma='scale', nu=0.9, C=10.0, tol=0.01)
opt_nusvr_reg.fit(X_train_scaled, y_tr)
y_pred = opt_nusvr_reg.predict(X_test_scaled).reshape(-1, )

# params_nusvr_grid={
#     'nu':[0.1,0.3,0.5,0.7,1],
#     'C':[1],
#     'tol':[0.01,0.03,0.05,0.07,0.1]
# }
# nusvr_reg=NuSVR()
# grid_search_nusvr=GridSearchCV(nusvr_reg,params_nusvr_grid,cv=4,scoring='neg_mean_absolute_error')
# grid_search_nusvr.fit(X_train_scaled,y_tr)
# print(grid_search_nusvr.cv_results_)
# y_pred=grid_search_nusvr.predict(X_test_scaled).reshape(-1,)

submission['time_to_failure'] = y_pred
# submission['time_to_failure'] = prediction_lgb_stack
submission.to_csv('nusvr.csv', index=False)
# Run trained SVR on full record:
#
wdw_beg = 1
wdw_end = 15000
regr_idx = 0
fetal_lead_wdw = np.zeros([(wdw_end - wdw_beg),])
mat_lead_wdw = np.zeros([(wdw_end - wdw_beg),])
cwt_wdw = np.zeros([(wdw_end - wdw_beg), n_feats])
for wdw_idx in np.arange(wdw_beg, wdw_end):
    fetal_lead_wdw[regr_idx] = fetal_lead[wdw_idx]
    mat_lead_wdw[regr_idx] = mat_lead[wdw_idx]
    blef = cwt_trans[wdw_idx - cwt_wdw_lth_h : wdw_idx + cwt_wdw_lth_h -1, :]
    cwt_wdw[regr_idx,:] = blef.flatten()
    regr_idx = regr_idx +1

z_rbf = nusv_res.predict(cwt_wdw)
figz = make_subplots(rows=2, cols=1)
figz.append_trace(go.Scatter(x = x_idxs, y = mat_lead_wdw), row=1, col=1)
figz.append_trace(go.Scatter(x = x_idxs, y = fetal_lead_wdw), row=2, col=1)
figz.append_trace(go.Scatter(x = x_idxs, y = z_rbf), row=2, col=1)
figz.show()


# plt.plot(fetal_lead[500:700])
# plt.plot(svr_rbf.predict(cwt_trans[500:700,:]))

arf = 12



# Form feature and target vectors
featureVectors, targetVectors = util.formFeatureAndTargetVectorsMultiHorizon(trainingSeries, depth, horizon)

predictedSeries = []
outputFolderName = "Outputs/Outputs" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
os.mkdir(outputFolderName)
for i in range(horizon):
    # Train different models for different horizon
    # Train the model
    model = NuSVR(kernel="rbf", gamma=1.0, nu=1.0, C=4, tol=1e-10)
    model.fit(featureVectors, targetVectors[:, i])

    # Now, predict the future
    featureVector = availableSeries.values[-depth:].reshape(1,depth)
    predicted = model.predict(featureVector)
    predictedSeries.append(predicted)

predictedSeries = pd.Series(data=np.array(predictedSeries).flatten(), index=testingSeries.index)

# Descale the series
predictedSeries = util.descaleSeries(predictedSeries)
actualSeries = util.descaleSeries(testingSeries)

# Plot the results
details = profileName + "_horizon_" + str(horizon) + "_depth_" + str(depth)
util.plotSeries("Outputs/Outputs_" + str(datetime.now()) + details,
                [actualSeries, predictedSeries], ["Actual Output", "Predicted Output"], "Facebook Fans Change - "+profileName, "Taylor Swift")