Пример #1
0
class MIKernelSVR(MIKernelSVM):

    def __init__(self, **parameters):
        svr_params = {
            'kernel' : 'precomputed',
            'max_iter': MAX_ITERS,
        }
        if 'C' in parameters:
            svr_params['C'] = parameters.pop('C')
        if 'nu' in parameters:
            svr_params['nu'] = parameters.pop('nu')
        self.estimator = NuSVR(**svr_params)

        # Get kernel name and pass remaining parameters to kernel
        mi_kernel_name = parameters.pop('kernel')
        self.mi_kernel = kernel.by_name(mi_kernel_name, **parameters)

    def fit(self, X, y):
        X = map(np.asarray, X)
        self.fit_data = X
        self.gram_matrix = self.mi_kernel(X, X)
        self.estimator.fit(self.gram_matrix, y)
        return self

    def predict(self, X=None):
        if X is None:
            gram_matrix = self.gram_matrix
        else:
            X = map(np.asarray, X)
            gram_matrix = self.mi_kernel(X, self.fit_data)
        return self.estimator.predict(gram_matrix)
    def train(self, x, y, param_names, random_search=100,
              kernel_cache_size=2000, **kwargs):
        if self._debug:
            print "Before preprocessing: 1st sample:\n", x[0]
        start = time.time()
        scaled_x = self._set_and_preprocess(x=x, param_names=param_names)

        # Check that each input is between 0 and 1
        self._check_scaling(scaled_x=scaled_x)

        if self._debug:
            print "Shape of training data: ", scaled_x.shape
            print "Param names: ", self._used_param_names
            print "First training sample\n", scaled_x[0]
            print "Encode: ", self._encode

        # Do a random search
        nu, c, gamma = self._random_search(random_iter=100, x=scaled_x, y=y,
                                           kernel_cache_size=kernel_cache_size)

        # Now train model
        try:
            nusvr = NuSVR(gamma=gamma, C=c, nu=nu, random_state=self._rng,
                          cache_size=kernel_cache_size)
            nusvr.fit(scaled_x, y)
            self._model = nusvr
        except Exception, e:
            print "Training failed", e.message
            svr = None
Пример #3
0
 def fit(self, X, Y, W):
     clf = NuSVR(nu=self.nu, C=self.C, kernel=self.kernel, degree=self.degree,
                 gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking,
                 tol=self.tol, cache_size=self.cache_size,
                 max_iter=self.max_iter)
     if W is not None:
         return NuSVRClassifier(clf.fit(X, Y.reshape(-1), W.reshape(-1)))
     return NuSVRClassifier(clf.fit(X, Y.reshape(-1)))
Пример #4
0
def traindt(x,y):
    global clf

    #print "training surrogate"
    #clft = DecisionTreeRegressor(max_depth=tree_max_depth,splitter='random')
    #clft = RandomForestRegressor()
    #clft = GradientBoostingRegressor(loss='lad',n_estimators=50,learning_rate=0.3,max_depth=2)
    clft = NuSVR(C=1e6)
    clf = clft.fit(x,y)
    def _random_search(self, random_iter, x, y, kernel_cache_size):
        # Default Values
        c = 1.0
        gamma = 0.0
        nu = 0.5
        best_score = -sys.maxint

        if random_iter > 0:
            sys.stdout.write("Do a random search %d times" % random_iter)
            param_dist = {"C": numpy.power(2.0, range(-5, 16)),
                          "gamma": numpy.power(2.0, range(-15, 4)),
                          "nu": uniform(loc=0.0001, scale=1-0.0001)}
            param_list = [{"C": c, "gamma": gamma, "nu": nu}, ]
            param_list.extend(list(ParameterSampler(param_dist,
                                                    n_iter=random_iter-1,
                                                    random_state=self._rng)))
            for idx, d in enumerate(param_list):
                nusvr = NuSVR(kernel='rbf',
                              gamma=d['gamma'],
                              C=d['C'],
                              nu=d['nu'],
                              random_state=self._rng,
                              cache_size=kernel_cache_size)
                train_x, test_x, train_y, test_y = \
                    train_test_split(x, y, test_size=0.5, random_state=self._rng)
                self._check_scaling(scaled_x=train_x)
                nusvr.fit(train_x, train_y)
                sc = nusvr.score(test_x, test_y)
                # Tiny output
                m = "."
                if idx % 10 == 0:
                    m = "#"
                if sc > best_score:
                    m = "<"
                    best_score = sc
                    c = d['C']
                    gamma = d['gamma']
                    nu = d['nu']
                sys.stdout.write(m)
                sys.stdout.flush()
            sys.stdout.write("Using C: %f, nu: %f and Gamma: %f\n" %
                             (c, nu, gamma))
        return nu, c, gamma
Пример #6
0
    def __init__(self, **parameters):
        svr_params = {
            'kernel' : 'precomputed',
            'max_iter': MAX_ITERS,
        }
        if 'C' in parameters:
            svr_params['C'] = parameters.pop('C')
        if 'nu' in parameters:
            svr_params['nu'] = parameters.pop('nu')
        self.estimator = NuSVR(**svr_params)

        # Get kernel name and pass remaining parameters to kernel
        mi_kernel_name = parameters.pop('kernel')
        self.mi_kernel = kernel.by_name(mi_kernel_name, **parameters)
Пример #7
0
def runTcheby():
    global param, approx_pareto_front, archiveOK, NO_FILE_TO_WRITE

    ############################################################################
    # PARAMETER

    # clf = SVR(C=1.0, epsilon=0.1, kernel="rbf")
    clf = NuSVR()
    clf2 = -1
    two_models_bool = False

    isReals = True
    start_fct, nb_functions = param[0:2]
    nb_iterations, neighboring_size = param[2:4]
    init_decisions, problem_size = param[4:6]
    max_decisions_maj, delta_neighbourhood = param[6:8]
    CR, search_space = param[8:10]
    F, distrib_index_n = param[10:12]
    pm, operator_fct = param[12:14]
    nb_samples, training_neighborhood_size = param[14:16]
    strategy, file_to_write = param[16:18]
    filter_strat, free_eval = param[18:20]
    param_print_every, file_to_writeR2 = param[20:22]
    filenameDIR, filenameSCORE = param[22:24]

    nb_objectives = len(start_fct)

    # get separatly offspring operator fct
    crossover_fct, mutation_fct, repair_fct = operator_fct

    best_decisions = copy.deepcopy(init_decisions)

    sampling_param = [
        crossover_fct,
        mutation_fct,
        repair_fct,
        best_decisions,
        F,
        problem_size,
        CR,
        search_space,
        distrib_index_n,
        pm,
    ]

    ############################################################################
    # INITIALISATION

    qual_tools.resetGlobalVariables(filenameDIR, filenameSCORE, nb_iterations, nb_functions)

    eval_to.resetEval()

    # get the directions weight for both starting functions
    directions = dec.getDirections(nb_functions, nb_objectives)

    # init the neighboring constant
    nt.initNeighboringTab(nb_functions, neighboring_size, directions, nb_objectives)

    # giving global visibility to the best_decisions to get the result at the end
    approx_pareto_front = best_decisions

    # initial best decisions scores
    best_decisions_scores = [eval_to.free_eval(start_fct, best_decisions[i], problem_size) for i in range(nb_functions)]

    pop_size = nb_functions

    # current optimal scores for both axes
    z_opt_scores = gt.getMinTabOf(best_decisions_scores)

    eval_to.initZstar(z_opt_scores)

    # get the first training part of the item we will learn on
    model_directions = train_to.getDirectionsTrainingMatrix(directions)

    # if the data shall be write in a file
    writeOK = False
    if file_to_write != NO_FILE_TO_WRITE:
        writeOK = True

    writeR2OK = False
    if file_to_writeR2 != NO_FILE_TO_WRITE:
        writeR2OK = True

    ############################################################################
    # MAIN ALGORITHM

    if writeOK:
        iot.printObjectives(file_to_write, eval_to.getNbEvals(), 0, best_decisions_scores, problem_size, nb_objectives)

    # IDs tab to allow a random course through the directions in the main loop
    id_directions = [i for i in range(nb_functions)]

    # iterations loop
    for itera in range(nb_iterations):
        if not free_eval:
            # Update model
            training_inputs, training_outputs, training_set_size, training_scores = train_to.getTrainingSet(
                model_directions,
                best_decisions,
                best_decisions_scores,
                eval_to.getZstar_with_decal(),
                strategy,
                nb_functions,
                training_neighborhood_size,
            )

            clf.fit(training_inputs, training_outputs)

        """
        if(writeR2OK and not free_eval):
            training_inputs_tcheby      = eval_to.getManyTcheby(training_inputs, training_scores, eval_to.getZstar_with_decal(), training_set_size)

            random_index = numpy.arange(0,training_set_size)
            numpy.random.shuffle(random_index)
            n_folds = 10
            folds_sizes = (training_set_size // n_folds) * numpy.ones(n_folds, dtype=numpy.int)
            folds_sizes[:training_set_size % n_folds] += 1

            training_inputs_array = numpy.array(training_inputs)
            training_tcheby_array = numpy.array(training_inputs_tcheby)

            R2_cv = []
            MSE_cv = []
            MAE_cv = []
            MDAE_cv = []

            clfCV = NuSVR()

            current = 0
            for fold_size in folds_sizes:
                start, stop = current, current + fold_size
                mask = numpy.ones(training_set_size, dtype=bool)
                mask[start:stop] = 0
                current = stop

                clfCV.fit(training_inputs_array[random_index[mask]], training_tcheby_array[random_index[mask]])

                test_fold_tcheby = training_tcheby_array[random_index[start:stop]]
                test_fold_predict = clfCV.predict(training_inputs_array[random_index[start:stop]])

                R2_cv  .append(r2_score             (test_fold_tcheby, test_fold_predict))
                MSE_cv .append(mean_squared_error   (test_fold_tcheby, test_fold_predict))
                MAE_cv .append(mean_absolute_error  (test_fold_tcheby, test_fold_predict))
                MDAE_cv.append(median_absolute_error(test_fold_tcheby, test_fold_predict))

            R2 = clf.score(training_inputs, training_outputs)
            MSE_cv_mean = numpy.mean(MSE_cv)
            RMSE_cv_mean = math.sqrt(MSE_cv_mean)
            MAE_cv_mean = numpy.mean(MAE_cv)
            MDAE_cv_mean = numpy.mean(MDAE_cv)
            R2_cv_mean = numpy.mean(R2_cv)

            iot.printR2(file_to_writeR2, eval_to.getNbEvals(), itera,  R2, R2_cv_mean, MSE_cv_mean , MAE_cv_mean, MDAE_cv_mean, RMSE_cv_mean, problem_size, print_every=1)

        """

        # random course through the directions
        random.shuffle(id_directions)

        # functions loop
        for f in id_directions:

            # get all the indice of neighbors of a function in a certain distance of f and include f in
            f_neighbors, current_neighbourhing_size = nt.getNeighborsOf(f, delta_neighbourhood)

            # get a list of offspring from the neighbors
            list_offspring = samp_to.extended_sampling(f, f_neighbors, sampling_param, nb_samples)

            # apply a filter on the offspring list and select the best one
            filter_param = [
                itera,
                f,
                clf,
                clf2,
                two_models_bool,
                f_neighbors,
                list_offspring,
                model_directions,
                start_fct,
                problem_size,
                eval_to.getZstar_with_decal(),
                best_decisions_scores,
                best_decisions,
                nb_objectives,
            ]
            best_candidate = filt_to.model_based_filtring(filter_strat, free_eval, filter_param)

            # evaluation of the newly made solution
            mix_scores = eval_to.eval(start_fct, best_candidate, problem_size)

            # MAJ of the z_star point
            has_changed = eval_to.min_update_Z_star(mix_scores, nb_objectives)

            # retraining of the model with the new z_star
            if has_changed and not free_eval:
                train_to.updateTrainingZstar(eval_to.getZstar_with_decal())
                training_outputs = train_to.retrainSet(
                    training_inputs, training_scores, eval_to.getZstar_with_decal(), training_set_size, nb_objectives
                )
                clf.fit(training_inputs, training_outputs)

            # boolean that is True if the offspring has been add to the archive
            added_to_S = False

            # count how many best decisions has been changed by the newly offspring
            cmpt_best_maj = 0

            # random course through the neighbors list
            random.shuffle(f_neighbors)

            # course through the neighbors list
            for j in f_neighbors:

                # stop if already max number of remplacement reach
                if cmpt_best_maj >= max_decisions_maj:
                    break

                # compute g_tcheby
                # wj = (directions[0][j],directions[1][j])
                wj = [directions[obj][j] for obj in range(0, nb_objectives)]
                g_mix = eval_to.g_tcheby(wj, mix_scores, eval_to.getZstar_with_decal())
                g_best = eval_to.g_tcheby(wj, best_decisions_scores[j], eval_to.getZstar_with_decal())

                # if the g_tcheby of the new solution is less distant from the z_optimal solution than the current best solution of the function j
                if g_mix < g_best:
                    cmpt_best_maj += 1
                    best_decisions[j] = best_candidate
                    best_decisions_scores[j] = mix_scores

                    # if we manage the archive and the solution have not been add already
                    if archiveOK and not (added_to_S):
                        arch_to.archivePut(best_candidate, mix_scores)
                        added_to_S = True
        # print("Update", itera, "done.")

        # if manage archive
        if archiveOK:
            arch_to.maintain_archive()

        # if write the result in a file
        if writeOK:
            iot.printObjectives(
                file_to_write,
                eval_to.getNbEvals(),
                itera + 1,
                best_decisions_scores,
                problem_size,
                nb_objectives,
                print_every=param_print_every,
            )
            continue

        # graphic update
        # yield arch_to.getArchiveScore(), best_decisions_scores, itera+1, eval_to.getNbEvals(), eval_to.getZstar_with_decal(), pop_size, isReals
    if not free_eval and writeR2OK:
        qual_tools.computeQualityEvaluation()
        qual_tools.generateDiffPredFreeFile()
    return
Пример #8
0
print 'LinearSVR precision train: {}'.format(lsvr_score_train)
lsvr_score_test = lsvr.score(smr_test.feature_matrix, smr_test.labels)
print 'LinearSVR precision test: {}'.format(lsvr_score_test)
print ''

nusvc = NuSVC()
print 'NuSVC config:'
print nusvc.get_params()
nusvc.fit(smr_train.feature_matrix, smr_train.labels)
nusvc_score_train = nusvc.score(smr_train.feature_matrix, smr_train.labels)
print 'NuSVC precision train: {}'.format(nusvc_score_train)
nusvc_score_test = nusvc.score(smr_test.feature_matrix, smr_test.labels)
print 'NuSVC precision test: {}'.format(nusvc_score_test)
print ''

nusvr = NuSVR()
print 'NuSVR config:'
print nusvr.get_params()
nusvr.fit(smr_train.feature_matrix, smr_train.labels)
nusvr_score_train = svc.score(smr_train.feature_matrix, smr_train.labels)
print 'NuSVR precision train: {}'.format(nusvr_score_train)
nusvr_score_test = nusvr.score(smr_test.feature_matrix, smr_test.labels)
print 'NuSVR precision test: {}'.format(nusvr_score_test)
print ''


dtc = DecisionTreeClassifier()
print 'DecisionTreeClassifier config:'
print dtc.get_params()
dtc.fit(smr_train.feature_matrix, smr_train.labels)
dtc_score_train = dtc.score(smr_train.feature_matrix, smr_train.labels)
Пример #9
0
def nusvrtrain(x, y, pre_x):
	x, pre_x = datscater(x, pre_x)
	clf = NuSVR(C = 5.0).fit(x, y)
	pred = clf.predict(pre_x)
	return pred
Пример #10
0
import numpy as np
import pickle
from build_database import flux_obj
from sklearn.svm import SVR
from sklearn.svm import NuSVR
from matplotlib import pyplot as plt

with open('database_lat.pkl','rb') as file:
    db = pickle.load(file)


print db.keys()


S = NuSVR(kernel='rbf')

X = []
Y = []
for k in db.keys():
#k = db.keys()[5]
#    print np.array(k)
    t = np.linspace(0,db[k].RES_FINT,db[k].NUM_T)
    #X = np.atleast_2d(t).T
    #Y = np.power(10,db[k].N)
    inp = np.vstack([np.outer(np.array([k[0],k[3]]), np.ones(int(db[k].NUM_T))), t]).T
    X.extend(inp)
    Y.extend(np.power(10,db[k].N))
    #Y.extend(db[k].N)

# Parameters
depth = 60
horizon = 7

# Form feature and target vectors
featureVectors, targetVectors = util.formFeatureAndTargetVectorsMultiHorizon(correctedSeries, depth, horizon)


outputFolderName = "Outputs/Outputs" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
os.mkdir(outputFolderName)
for i in range(horizon):
    # Train different models for different horizon
    # Train the model
    #model = Pipeline([('poly', PolynomialFeatures(degree=2)), ('linear', LinearRegression(fit_intercept=False))])
    #model = NuSVR(kernel='linear', nu=1.0)
    model = NuSVR(kernel="rbf", nu=1.0, tol=1e-10, gamma=1.0)
    #model = RidgeCV()
    model.fit(featureVectors, targetVectors[:, i])

    predictedTargetVectors = model.predict(featureVectors)

    # Plot the actual and predicted
    actual = targetVectors[:, i]
    predicted = predictedTargetVectors

    # Descale
    actual = util.scalingFunction.inverse_transform(actual)
    predicted = util.scalingFunction.inverse_transform(predicted)

    outplot = outputPlot.OutputPlot(outputFolderName + "/Prediction_horizon"+str(i+1)+".html", "Facebook Fans Change - Linear Regression", "Taylor Swift", "Time", "Output")
    outplot.setXSeries(np.arange(1, targetVectors.shape[0]))
# Step 4 - Remove the outliers
correctedSeries = util.detectAndRemoveOutliers(rawSeries)

# Learning Process - Start

# Parameters
depth = 100

# Form feature and target vectors
featureVectors, targetVectors = util.formContinousFeatureAndTargetVectorsWithoutBias(correctedSeries, depth)
featureVectors, targetVectors = util.formFeatureAndTargetVectors(correctedSeries, depth)


# # Train using linear regression
#model = SVR(kernel="linear")
model = NuSVR(nu=1.0, kernel="linear")
model.fit(featureVectors, targetVectors[:, 0])
predictedTrainingOutputData = model.predict(featureVectors)

targetVectors = targetVectors

# Predicted and actual Series
actualSeries = pd.Series(data=targetVectors.flatten(), index=correctedSeries.index[-targetVectors.shape[0]:])
predictedSeries = pd.Series(data=predictedTrainingOutputData.flatten(), index=correctedSeries.index[-targetVectors.shape[0]:])

# Learning Process - End

# Step 5 - Descale the series
actualSeries = util.descaleSeries(actualSeries)
predictedSeries = util.descaleSeries(predictedSeries)