def train(self, cvs, init_params=[], FS=False, inner_jobs=1):
        print('training with deap...')

        X = np.vstack((cvs[0][0], cvs[0][2], cvs[0][4]))

        if len(cvs[0][1].shape)==1 and len(cvs[0][5].shape)==1:
            y = np.hstack((cvs[0][1], cvs[0][3], cvs[0][5]))
        else:
            y = np.vstack((cvs[0][1], cvs[0][3], cvs[0][5])).ravel()
        self.D, self.N = X.shape


        if 'elasticnet' in str.lower(self.model_type):
            X_train = cvs[0][0]
            y_train = cvs[0][1].reshape(-1, 1)
            X_val = cvs[0][2]
            y_val = cvs[0][3].reshape(-1, 1)
            X_test = cvs[0][4]
            y_test = cvs[0][5].reshape(-1, 1)
            X_train = np.vstack((X_train, X_val, X_test))
            y_train = np.vstack((y_train, y_val, y_test))
            model = ElasticNetCV(cv=5, max_iter=4000)
            model.fit(X_train, y_train.ravel())

            self.best_params = model.get_params()
            ypred = model.predict(X_test).ravel()
            if self.rated is None:
                self.accuracy = np.mean(np.abs(ypred - y_test.ravel()) / y_test.ravel())
            else:
                self.accuracy = np.mean(np.abs(ypred - y_test.ravel()))
            self.acc_test = self.accuracy
            self.model = model

            self.logger.info('Best params')
            self.logger.info(self.best_params)
            self.logger.info('Final mae %s', str(self.acc_test))
            self.logger.info('Final rms %s', str(self.accuracy))
            self.logger.info('finish train for model %s', self.model_type)
            self.istrained = True
            self.save(self.model_dir)

            return self.to_dict()

        else:
            if 'xgb' in str.lower(self.model_type):
                params = {'learning_rate': np.logspace(-5, -1, num=6, base=10),
                          'max_depth': np.unique(np.linspace(1, 150, num=50).astype('int')),
                          'colsample_bytree': np.linspace(0.4, 1.0, num=60),
                          'colsample_bynode': np.linspace(0.4, 1.0, num=60),
                          'subsample': np.linspace(0.2, 1.0, num=6),
                          'gamma': np.linspace(0.001, 2, num=20),
                          'reg_alpha': np.linspace(0, 1.0, num=12)}
                model = xgb.XGBRegressor(objective="reg:squarederror", random_state=42)
                ngen = self.static_data['sklearn']['gen']
                npop = self.static_data['sklearn']['pop']
            elif 'rf' in str.lower(self.model_type):
                if FS:
                    params = {
                        'max_depth': [1, 2, 3, 5, 10, 16, 24, 36, 52, 76, 96, 128, 150],
                    }
                    model = RandomForestRegressor(n_estimators=100, n_jobs=inner_jobs,random_state=42, max_features=2/3)
                    ngen = 2
                    npop = 4
                else:
                    params = {
                              'max_depth': np.unique(np.linspace(1, 130, num=50).astype('int')),
                              'max_features': ['auto', 'sqrt', 'log2', None, 0.8, 0.6, 0.4],
                              'min_samples_leaf': np.unique(np.linspace(1, cvs[0][0].shape[0]-10, num=50).astype('int')),
                              'min_samples_split': np.unique(np.linspace(2, cvs[0][0].shape[0]-10, num=50).astype('int')),
                              }
                    model =  RandomForestRegressor(n_estimators=500, random_state=42)
                    ngen = self.static_data['sklearn']['gen']
                    npop = self.static_data['sklearn']['pop']
            elif str.lower(self.model_type)=='svm':
                params = {'C': np.logspace(-2, 3, num=100, base=10),
                          'kernel':['linear', 'poly', 'rbf', 'sigmoid'],
                          'gamma': list(np.linspace(0.001, 2, num=100)) + ['scale', 'auto']}
                model = SVR(max_iter=1000000)
                ngen = self.static_data['sklearn']['gen']
                npop = self.static_data['sklearn']['pop']
            elif str.lower(self.model_type)=='nusvm':
                params = {'nu': np.linspace(0.01, 0.99, num=10),
                          'C': np.logspace(-1, 5, num=100, base=10),
                          'gamma': np.linspace(0.01, 10, num=100)}
                model = NuSVR(max_iter=1000000)
                ngen = self.static_data['sklearn']['gen']
                npop = self.static_data['sklearn']['pop']
            elif 'mlp' in str.lower(self.model_type):
                if not self.is_combine:
                    params = {'hidden_layer_sizes': np.linspace(4, 800, num=50).astype('int'),
                              'alpha': np.linspace(1e-5, 1e-1, num=4),
                              }
                else:
                    params = {'hidden_layer_sizes': np.linspace(4, 250, num=50).astype('int'),
                              'activation': ['identity', 'tanh', 'relu'],
                              'alpha': np.linspace(1e-5, 1e-1, num=4),
                              }

                model = MLPRegressor(max_iter=1000, early_stopping=True)
                ngen = 5
                npop = self.static_data['sklearn']['pop']

        if not self.path_group is None:
            ncpus = joblib.load(os.path.join(self.path_group, 'total_cpus.pickle'))
            gpu_status = joblib.load(os.path.join(self.path_group, 'gpu_status.pickle'))

            njobs = int(ncpus - gpu_status)
            cpu_status = njobs
            joblib.dump(cpu_status, os.path.join(self.path_group, 'cpu_status.pickle'))
        else:
            njobs = self.njobs


        cv = EvolutionaryAlgorithmSearchCV(estimator=model,
                                           params=params,
                                           scoring='neg_root_mean_squared_error',
                                           cv=3,
                                           rated=self.rated,
                                           verbose=1,
                                           population_size=npop,
                                           gene_mutation_prob=0.8,
                                           gene_crossover_prob=0.8,
                                           tournament_size=3,
                                           generations_number=ngen,
                                           refit=False,
                                           init_params=init_params,
                                           n_jobs=njobs,
                                           path_group=self.path_group)

        cv.fit(cvs)

        self.best_params = cv.best_params_

        self.accuracy, self.acc_test = self.fit_model1(model, self.best_params, cvs)

        self.model = model
        self.model.set_params(**self.best_params)
        self.model.fit(X, y.ravel())

        self.logger.info('Best params')
        self.logger.info(self.best_params)
        self.logger.info('Final mae %s', str(self.acc_test))
        self.logger.info('Final rms %s', str(self.accuracy))
        self.logger.info('finish train for model %s', self.model_type)
        self.istrained = True
        self.save(self.model_dir)

        return self.to_dict()
Пример #2
0
            # "Passive Aggressive Regressor ": PassiveAggressiveRegressor(max_iter=100000, tol=0.5), 
            # "random forest regressor": RandomForestRegressor(n_estimators=10), 
            # "gradient boosting regressor": GradientBoostingRegressor(min_samples_leaf=3),
            # "k nearest neighbiours regressor": KNeighborsRegressor(),
            # "RANSAC regressor": RANSACRegressor(),
            "SGD regressor": SGDRegressor(max_iter=100000, tol=0.5),
            # "kernel ridge": KernelRidge(),
            # "ada boost regressor": AdaBoostRegressor(),
            # "bagging regressor": BaggingRegressor(),
            # "extra trees regressor": ExtraTreesRegressor(n_estimators=10),
            # "dummy regressor": DummyRegressor(),
            # "PLSR regressor": PLSRegression(),
            # "radius neighbours regressor": RadiusNeighborsRegressor(radius=5),
            # "neural_network.MLPRegressor 500": MLPRegressor(hidden_layer_sizes=(50)),
            # "svm.SVR": SVR(gamma="scale"),
            "svm.NuSVR epsilon=": NuSVR(nu=0.7, gamma="scale")
            # "svm.LinearSVR epsilom=": LinearSVR(max_iter=10000)
            # "decision tree regressor": DecisionTreeRegressor(),
            # "extra tree regressor": ExtraTreeRegressor()
        }

# models = {
#             "1":MLPRegressor(hidden_layer_sizes=(64,2), solver="adam"),
#             "2":MLPRegressor(hidden_layer_sizes=(64,2), solver="lbfgs"),
#         }

cp(t, "initialising models")

results = []

rand = [0,0]
Пример #3
0
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import validation_curve


csv = np.genfromtxt ('data.csv', delimiter=",")
ftest = csv[:,0]
ftrain= csv[:,1]
Xtest=csv[:,2:21]
Xtrain=csv[:,21:40]
ytest=csv[:,40]
ytrain=csv[:,41]

#classifier = Ridge(alpha=1.5)
#classifier=SVR(gamma='scale', C=1.5, epsilon=0.2)
classifier=NuSVR(gamma='scale', C=1.5, nu=0.1)
classifier.fit(Xtrain,ytrain)
prediction=classifier.predict(Xtest)
sqerror = (prediction-ytest)**2
meansquareerror=np.mean((prediction-ytest)**2)
# print(meansquareerror)
score=cross_validate(classifier,Xtrain,ytrain,scoring='neg_mean_squared_error',cv=16,return_train_score=False)
# print(score)
accuracy=cross_val_score(estimator=classifier,X=Xtrain,y=ytrain,cv=10)
np.random.seed(0)
temp=np.arange(ytrain.shape[0])
np.random.shuffle(temp)
Xtrain,ytrain=Xtrain[temp],ytrain[temp]
train_score,valid_score= validation_curve(SVR(),Xtrain,ytrain,"gamma",np.logspace(-1,3,3),cv=8)
#print(train_score)
#print(valid_score)
Пример #4
0
def getModel(config, modelname):
    info("Getting {0} Model".format(modelname), ind=0)

    problemType = config['problem']
    modelData = getModelData(config, modelname)
    modelParams = modelData.get('params')
    retval = None

    ###########################################################################
    # Classification
    ###########################################################################
    if isClassification(problemType):
        if modelname == "logistic":
            retval = classifier(modelname, LogisticRegression(), modelParams)
        if modelname == "sgd":
            retval = classifier(modelname, SGDClassifier(), modelParams)
        if modelname == "passagg":
            retval = classifier(modelname, PassiveAggressiveClassifier(),
                                modelParams)

        if modelname == "mlp":
            retval = classifier(modelname, MLPClassifier(), modelParams)

        if modelname == "xgboost":
            retval = classifier(modelname, XGBClassifier(), modelParams)

        if modelname == "gaussproc":
            retval = classifier(modelname, GaussianProcessClassifier(),
                                modelParams)

        if modelname == "lda":
            retval = classifier(modelname, LinearDiscriminantAnalysis(),
                                modelParams)
        if modelname == "qda":
            retval = classifier(modelname, QuadraticDiscriminantAnalysis(),
                                modelParams)

        if modelname == "nb":
            retval = classifier(modelname, GaussianNB(), modelParams)
        if modelname == "nbbern":
            retval = classifier(modelname, BernoulliNB(), modelParams)
        if modelname == "nbmulti":
            retval = classifier(modelname, MultinomialNB(), modelParams)

        if modelname == "dtree":
            retval = classifier(modelname, DecisionTreeClassifier(),
                                modelParams)

        if modelname == "kneighbors":
            retval = classifier(modelname, KNeighborsClassifier(), modelParams)
        if modelname == "rneighbors":
            retval = classifier(modelname, RadiusNeighborsClassifier(),
                                modelParams)

        if modelname == "svmlin":
            retval = classifier(modelname, LinearSVC(), modelParams)
        if modelname == "svmnupoly":
            retval = classifier(modelname, NuSVC(), modelParams)
        if modelname == "svmnulinear":
            retval = classifier(modelname, NuSVC(), modelParams)
        if modelname == "svmnusigmoid":
            retval = classifier(modelname, NuSVC(), modelParams)
        if modelname == "svmnurbf":
            retval = classifier(modelname, NuSVC(), modelParams)
        if modelname == "svmepspoly":
            retval = classifier(modelname, SVC(), modelParams)
        if modelname == "svmepslinear":
            retval = classifier(modelname, SVC(), modelParams)
        if modelname == "svmepssigmoid":
            retval = classifier(modelname, SVC(), modelParams)
        if modelname == "svmepsrbf":
            retval = classifier(modelname, SVC(), modelParams)

        if modelname == "rf":
            retval = classifier(modelname, RandomForestClassifier(),
                                modelParams)
        if modelname == "extratrees":
            retval = classifier(modelname, ExtraTreesClassifier(), modelParams)
        if modelname == "adaboost":
            retval = classifier(modelname, AdaBoostClassifier(), modelParams)
        if modelname == "gbm":
            retval = classifier(modelname, GradientBoostingClassifier(),
                                modelParams)

        if modelname == "tpot":
            retval = classifier(modelname, TPOTClassifier(), modelParams)

        #######################################################################
        # Regression
        #######################################################################
        if modelname == "lightning":
            retval = external.extlightning.createLightningClassifier(
                modelParams)

    ###########################################################################
    # Regression
    ###########################################################################
    if isRegression(problemType):
        if modelname == "linear":
            retval = classifier(modelname, LinearRegression(), modelParams)
        if modelname == "ridge":
            retval = classifier(modelname, Ridge(), modelParams)
        if modelname == "lasso":
            retval = classifier(modelname, Lasso(), modelParams)
        if modelname == "elasticnet":
            retval = classifier(modelname, ElasticNet(), modelParams)
        if modelname == "omp":
            retval = classifier(modelname, OrthogonalMatchingPursuit(),
                                modelParams)
        if modelname == "bayesridge":
            retval = classifier(modelname, BayesianRidge(), modelParams)
        if modelname == "ard":
            retval = classifier(modelname, ARDRegression(), modelParams)
        if modelname == "sgd":
            retval = classifier(modelname, SGDRegressor(), modelParams)
        if modelname == "passagg":
            retval = classifier(modelname, PassiveAggressiveRegressor(),
                                modelParams)
        if modelname == "perceptron":
            retval = None
        if modelname == "huber":
            retval = classifier(modelname, HuberRegressor(), modelParams)
        if modelname == "theilsen":
            retval = classifier(modelname, TheilSenRegressor(), modelParams)
        if modelname == "ransac":
            retval = classifier(modelname, RANSACRegressor(), modelParams)

        if modelname == "mlp":
            retval = classifier(modelname, MLPRegressor(), modelParams)

        if modelname == "xgboost":
            retval = classifier(modelname, XGBRegressor(), modelParams)

        if modelname == "gaussproc":
            retval = classifier(modelname, GaussianProcessRegressor(),
                                modelParams)

        if modelname == "dtree":
            retval = classifier(modelname, DecisionTreeRegressor(),
                                modelParams)

        if modelname == "kneighbors":
            retval = classifier(modelname, KNeighborsRegressor(), modelParams)
        if modelname == "rneighbors":
            retval = classifier(modelname, RadiusNeighborsRegressor(),
                                modelParams)

        if modelname == "svmlin":
            retval = classifier(modelname, LinearSVR(), modelParams)
        if modelname == "svmnupoly":
            retval = classifier(modelname, NuSVR(), modelParams)
        if modelname == "svmnulinear":
            retval = classifier(modelname, NuSVR(), modelParams)
        if modelname == "svmnusigmoid":
            retval = classifier(modelname, NuSVR(), modelParams)
        if modelname == "svmnurbf":
            retval = classifier(modelname, NuSVR(), modelParams)
        if modelname == "svmepspoly":
            retval = classifier(modelname, SVR(), modelParams)
        if modelname == "svmepslinear":
            retval = classifier(modelname, SVR(), modelParams)
        if modelname == "svmepssigmoid":
            retval = classifier(modelname, SVR(), modelParams)
        if modelname == "svmepsrbf":
            retval = classifier(modelname, SVR(), modelParams)

        if modelname == "rf":
            retval = classifier(modelname, RandomForestRegressor(),
                                modelParams)
        if modelname == "extratrees":
            retval = classifier(modelname, ExtraTreesRegressor(), modelParams)
        if modelname == "adaboost":
            retval = classifier(modelname, AdaBoostRegressor(), modelParams)
        if modelname == "gbm":
            retval = classifier(modelname, GradientBoostingRegressor(),
                                modelParams)

        if modelname == "isotonic":
            retval = classifier(modelname, IsotonicRegression(), modelParams)

        if modelname == "earth":
            retval = classifier(modelname, Earth(), modelParams)

        if modelname == "symbolic":
            retval = classifier(modelname, SymbolicRegressor(), modelParams)

        if modelname == "tpot":
            retval = classifier(modelname, TPOTRegressor(), modelParams)

    if retval is None:
        raise ValueError(
            "No model with name {0} was created".format(modelname))

    model = retval.get()

    return model
Пример #5
0
print(housing.shape)

store_pkl(housing_mapper, "Housing.pkl")

housing_X = housing[:, 0:13]
housing_y = housing[:, 13]


def build_housing(regressor, name, to_sparse=False):
    if (to_sparse):
        regressor = regressor.fit(sparse.csr_matrix(housing_X), housing_y)
    else:
        regressor = regressor.fit(housing_X, housing_y)
    store_pkl(regressor, name + ".pkl")
    medv = DataFrame(regressor.predict(housing_X), columns=["MEDV"])
    store_csv(medv, name + ".csv")


build_housing(
    MLPRegressor(activation="tanh",
                 hidden_layer_sizes=(26, ),
                 algorithm="l-bfgs",
                 random_state=13,
                 tol=0.001,
                 max_iter=1000), "MLPHousing")
build_housing(SGDRegressor(random_state=13), "SGDHousing")
build_housing(SVR(), "SVRHousing", to_sparse=True)
build_housing(LinearSVR(random_state=13), "LinearSVRHousing", to_sparse=True)
build_housing(NuSVR(), "NuSVRHousing", to_sparse=True)
Пример #6
0
    def _test_evaluation(self, allow_slow):
        """
        Test that the same predictions are made
        """

        # Generate some smallish (some kernels take too long on anything else) random data
        x, y = [], []
        for _ in range(50):
            cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2)
            x.append([cur_x1, cur_x2])
            y.append(1 + 2 * cur_x1 + 3 * cur_x2)

        input_names = ["x1", "x2"]
        df = pd.DataFrame(x, columns=input_names)

        # Parameters to test
        kernel_parameters = [
            {},
            {
                "kernel": "rbf",
                "gamma": 1.2
            },
            {
                "kernel": "linear"
            },
            {
                "kernel": "poly"
            },
            {
                "kernel": "poly",
                "degree": 2
            },
            {
                "kernel": "poly",
                "gamma": 0.75
            },
            {
                "kernel": "poly",
                "degree": 0,
                "gamma": 0.9,
                "coef0": 2
            },
            {
                "kernel": "sigmoid"
            },
            {
                "kernel": "sigmoid",
                "gamma": 1.3
            },
            {
                "kernel": "sigmoid",
                "coef0": 0.8
            },
            {
                "kernel": "sigmoid",
                "coef0": 0.8,
                "gamma": 0.5
            },
        ]
        non_kernel_parameters = [
            {},
            {
                "C": 1
            },
            {
                "C": 1.5,
                "shrinking": True
            },
            {
                "C": 0.5,
                "shrinking": False,
                "nu": 0.9
            },
        ]

        # Test
        for param1 in non_kernel_parameters:
            for param2 in kernel_parameters:
                cur_params = param1.copy()
                cur_params.update(param2)

                cur_model = NuSVR(**cur_params)
                cur_model.fit(x, y)
                df["prediction"] = cur_model.predict(x)

                spec = scikit_converter.convert(cur_model, input_names,
                                                "target")

                if _is_macos() and _macos_version() >= (10, 13):
                    metrics = evaluate_regressor(spec, df)
                    self.assertAlmostEqual(metrics["max_error"], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
                            model_features=X.columns,
                            feature='max')

# plot it
pdp.pdp_plot(pdp_goals, 'max')
plt.show()
# Create the data that we will plot
pdp_goals = pdp.pdp_isolate(model=rfc_model,
                            dataset=val_X,
                            model_features=X.columns,
                            feature='min')

# plot it
pdp.pdp_plot(pdp_goals, 'min')
plt.show()
svm = NuSVR()
svm.fit(X_train_scaled, y_train.values.flatten())
y_pred_svm = svm.predict(X_train_scaled)
score = mean_absolute_error(y_train.values.flatten(), y_pred_svm)
print(f'Score: {score:0.3f}')
folds = KFold(n_splits=5, shuffle=True, random_state=42)
params = {
    'objective': "regression",
    'boosting': "gbdt",
    'metric': "mae",
    'boost_from_average': "false",
    'num_threads': 8,
    'learning_rate': 0.001,
    'num_leaves': 52,
    'max_depth': -1,
    'tree_learner': "serial",
    def createModel(self):
        if self.checkErrors():
            return
        gamma_choice = self.gamma_choice.get()
        kernels = ["linear", "rbf", "poly", "sigmoid"]
        kernel = kernels[self.kernel_type_var.get()]
        
        do_forecast = self.do_forecast_option.get()
        val_option = self.validation_option.get()
        
        X, y = self.getData()
        X: np.ndarray
        y: np.ndarray

        if self.grid_option_var.get() == 0:
            epsilon = float(self.parameters[0].get())
            nu = float(self.parameters[1].get())
            C = 2 ** float(self.parameters[2].get())
            gamma = 2 ** float(self.parameters[3].get()) if gamma_choice == 2 else "auto" if gamma_choice == 1 else "scale"
            coef0 = float(self.parameters[4].get())
            degree = float(self.parameters[5].get())
            
            if self.model_type_var.get() == 0:
                model = SVR(kernel=kernel, C=C, epsilon=epsilon, gamma=gamma, coef0=coef0, degree=degree)
            else:
                model = NuSVR(kernel=kernel, C=C, nu=nu, gamma=gamma, coef0=coef0, degree=degree)

            if val_option == 0:
                model.fit(X, y)
                if do_forecast == 0:
                    pred = model.predict(X).reshape(-1)
                    if self.scale_var.get() != "None":
                        pred = self.label_scaler.inverse_transform(pred.reshape(-1,1)).reshape(-1) # type: ignore
                        y = self.label_scaler.inverse_transform(y.reshape(-1,1)).reshape(-1) # type: ignore
                    losses = loss(y, pred)[:-1]
                    self.y_test = y
                    self.pred = pred
                    for i,j in enumerate(losses):
                        self.test_metrics_vars[i].set(j)
                self.model = model # type: ignore
            
            elif val_option == 1:
                if do_forecast == 0:
                    X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=self.random_percent_var.get()/100)
                    model.fit(X_train, y_train)
                    pred = model.predict(X_test).reshape(-1)
                    if self.scale_var.get() != "None":
                        pred = self.label_scaler.inverse_transform(pred.reshape(-1,1)).reshape(-1) # type: ignore
                        y_test = self.label_scaler.inverse_transform(y_test.reshape(-1,1)).reshape(-1) # type: ignore
                    losses = loss(y_test, pred)[:-1]
                    self.y_test = y_test
                    self.pred = pred
                    for i,j in enumerate(losses):
                        self.test_metrics_vars[i].set(j)
                else:
                    size = int((self.random_percent_var.get()/100)*len(X))
                    X = X[-size:]
                    y = y[-size:]
                    model.fit(X, y)
                self.model = model # type: ignore

            elif val_option == 2:
                if do_forecast == 0:
                    cvs = cross_validate(model, X, y, cv=self.cross_val_var.get(), scoring=skloss)
                    for i,j in enumerate(list(cvs.values())[2:]):
                        self.test_metrics_vars[i].set(j.mean())

            elif val_option == 3:
                if do_forecast == 0:
                    cvs = cross_validate(model, X, y, cv=X.shape[0]-1, scoring=skloss)
                    for i,j in enumerate(list(cvs.values())[2:]):
                        self.test_metrics_vars[i].set(j.mean())
            
        else:
            params = {}
            interval = self.interval_var.get()
             
            params["C"] = np.unique(np.logspace(float(self.optimization_parameters[2][0].get()), float(self.optimization_parameters[2][1].get()), interval, base=2))
            if self.model_type_var.get() == 0:
                params["epsilon"] = np.unique(np.linspace(float(self.optimization_parameters[0][0].get()), float(self.optimization_parameters[0][1].get()), interval))
                model = SVR()
            else:
                min_nu = max(0.0001, float(self.optimization_parameters[1][0].get()))
                max_nu = min(1, float(self.optimization_parameters[1][1].get()))
                params["nu"] = np.unique(np.linspace(min_nu, max_nu, interval))
                model = NuSVR()
            if kernel != "linear":
                if gamma_choice == 2:
                    params["gamma"] = np.unique(np.logspace(float(self.optimization_parameters[3][0].get()), float(self.optimization_parameters[3][1].get()), interval, base=2))
                elif gamma_choice == 1:
                    params["gamma"] = ["auto"]
                else:
                    params["gamma"] = ["scale"]
            
            if kernel == "poly" or kernel == "sigmoid":
                params["coef0"] = np.unique(np.linspace(float(self.optimization_parameters[4][0].get()), float(self.optimization_parameters[4][1].get()), interval))

            if kernel == "poly":
                params["degree"] = np.unique(np.linspace(float(self.optimization_parameters[5][0].get()), float(self.optimization_parameters[5][1].get()), interval, dtype=int))

            params["kernel"] = [kernel]

            cv = self.gs_cross_val_var.get() if self.gs_cross_val_option.get() == 1 else None
            
            regressor = GridSearchCV(model, params, cv=cv)
            
            if val_option == 0:
                regressor.fit(X, y)
                if do_forecast == 0:
                    pred = regressor.predict(X)
                    if self.scale_var.get() != "None":
                        pred = self.label_scaler.inverse_transform(pred.reshape(-1,1)).reshape(-1) # type: ignore
                        y = self.label_scaler.inverse_transform(y.reshape(-1,1)).reshape(-1) # type: ignore
                    losses = loss(y, pred)[:-1]
                    self.y_test = y
                    self.pred = pred
                    for i,j in enumerate(losses):
                        self.test_metrics_vars[i].set(j)
                self.model = regressor.best_estimator_

            elif val_option == 1:
                if do_forecast == 0:
                    X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=self.random_percent_var.get()/100)
                    regressor.fit(X_train, y_train)
                    pred = regressor.predict(X_test)
                    if self.scale_var.get() != "None":
                        pred = self.label_scaler.inverse_transform(pred.reshape(-1,1)).reshape(-1) # type: ignore
                        y_test = self.label_scaler.inverse_transform(y_test.reshape(-1,1)).reshape(-1) # type: ignore
                    losses = loss(y_test, pred)[:-1]
                    self.y_test = y_test
                    self.pred = pred
                    for i,j in enumerate(losses):
                        self.test_metrics_vars[i].set(j)
                else:
                    size = int((self.random_percent_var.get()/100)*len(X))
                    X = X[-size:]
                    y = y[-size:]
                    regressor.fit(X, y)
                self.model = regressor.best_estimator_
            
            popupmsg("Best Params: " + str(self.model.get_params()))
Пример #9
0
    def _fit_transform(self, Xtr, Ytr, n_drop = 100, regression_method = 'linear', regression_parameters = None, embedding = 'identity', n_dim = 3, embedding_parameters = None):
        n_data, dim_data = Xtr.shape
        _, dim_output = Ytr.shape

        self._dim_output = dim_output

        # If this is the first time the network is tuned, set the input and feedback weights.
        # The weights are dense and uniformly distributed in [-1.0, 1.0]
        if (self._input_weights is None):
            self._input_weights = 2.0*np.random.rand(self._n_internal_units, dim_data) - 1.0

        if (self._feedback_weights is None):
            self._feedback_weights = 2.0*np.random.rand(self._n_internal_units, dim_output) - 1.0

        # Initialize regression method
        if (regression_method == 'nusvr'):
            # NuSVR, RBF kernel
            C, nu, gamma = regression_parameters
            self._regression_method = NuSVR(C = C, nu = nu, gamma = gamma)

        elif (regression_method == 'linsvr'):
            # NuSVR, linear kernel
            #C = regression_parameters[0]
            #nu = regression_parameters[1]
            C, epsilon = regression_parameters

            #self._regression_method = NuSVR(C = C, nu = nu, kernel='linear')
            self._regression_method = LinearSVR(C = C, epsilon = epsilon)

        elif (regression_method == 'enet'):
            # Elastic net
            alpha, l1_ratio = regression_parameters
            self._regression_method = ElasticNet(alpha = alpha, l1_ratio = l1_ratio)

        elif (regression_method == 'ridge'):
            # Ridge regression
            self._regression_method = Ridge(alpha = regression_parameters)

        elif (regression_method == 'lasso'):
            # LASSO
            self._regression_method = Lasso(alpha = regression_parameters)

        elif (regression_method == 'bayeridge'):
            lambda_1, lambda_2, alpha_1, alpha_2 = regression_parameters
            self._regression_method = BayesianRidge(lambda_1=lambda_1,lambda_2=lambda_2,alpha_1=alpha_1,alpha_2=alpha_2)

        elif (regression_method == 'gpr'):
            self._regression_method = GaussianProcessRegressor()

        else:
            # Use canonical linear regression
            self._regression_method = LinearRegression()

        # Initialize embedding method
        if (embedding == 'identity'):
            self._embedding_dimensions = self._n_internal_units
        else:
            self._embedding_dimensions = n_dim

            if (embedding == 'kpca'):
                # Kernel PCA with RBF kernel
                self._embedding_method = KernelPCA(n_components = n_dim, kernel = 'rbf', gamma = embedding_parameters)

            elif (embedding == 'pca'):
                # PCA
                self._embedding_method = PCA(n_components = n_dim)

            elif (embedding == 'fa'):
                # ICA
                self._embedding_method = FactorAnalysis(n_components = n_dim)

            elif (embedding == 'spca'):
                # Sparse PCA
                self._embedding_method = SparsePCA(n_components = n_dim, alpha = embedding_parameters)

            elif (embedding == 'ipca'):
                # Sparse PCA
                self._embedding_method = IncrementalPCA(n_components = n_dim)

            elif (embedding == 'tsvd'):
                # Sparse PCA
                if n_dim >= self._n_internal_units:
                    self._embedding_method = TruncatedSVD(n_components = self._n_internal_units-1)
                else:
                    self._embedding_method = TruncatedSVD(n_components = n_dim)

            elif (embedding == 'wpca'):
                # Bayesian Probabilistic PCA
                self._embedding_method = WPCA(n_components=n_dim)

            elif (embedding == 'rpca'):
                # Bayesian Probabilistic PCA
                self._embedding_method = RobustPCA.RobustPCA()

            elif (embedding == 'tga'):
                # Bayesian Probabilistic PCA
                self._embedding_method = tga.TGA(n_components=n_dim, random_state=1)

            elif (embedding == 'empca'):
                # Expectation Maximization PCA
                self._embedding_method = EMPCA(n_components=n_dim)

            elif (embedding == 'mds'):
                # Multi-Dimensional Scaling (MDS)
                self._embedding_method = MDS(n_components=n_dim)

            elif (embedding == 'ica'):
                # Sparse PCA
                alpha = embedding_parameters
                self._embedding_method = FastICA.FastICA(n_components=n_dim)
                #self._embedding_method = FastICA.FastICA(n_components=n_dim, fun_args={'alpha':alpha})
                #self._embedding_method = FastICA.FastICA(n_components = n_dim, algorithm = 'deflation')

            else:
                raise(ValueError, "Unknown embedding method")

        # Calculate states/embedded states.
        # Note: If the embedding is 'identity', embedded states will be equal to the states.
        states, embedded_states,_ = self._compute_state_matrix(X = Xtr, Y = Ytr, n_drop = n_drop)

        # Train output
        if self._regression_method == 'rvr':
            np.savetxt('/home/minh/Desktop/vb_linear/input_rvr',
                       np.concatenate((embedded_states, self._scaleshift(Xtr[n_drop:,:], self._input_scaling, self._input_shift)), axis=1),delimiter=',')
            np.savetxt('/home/minh/Desktop/vb_linear/output_rvr',
                       self._scaleshift(Ytr[n_drop:,:], self._teacher_scaling, self._teacher_shift).flatten(),delimiter=',')
            subprocess.call("~/PycharmProjects/MultivariateESN/run_rvr.sh",shell=True)
            print('end run_rvr!')
        else:
            self._regression_method.fit(np.concatenate((embedded_states, self._scaleshift(Xtr[n_drop:, :], self._input_scaling, self._input_shift)), axis=1),
                                        self._scaleshift(Ytr[n_drop:, :], self._teacher_scaling,self._teacher_shift).flatten())

        return states, embedded_states
Пример #10
0
                                                min_samples_leaf=5),
                          random_state=13,
                          n_estimators=17), "AdaBoostHousing")
    build_housing(BayesianRidge(), "BayesianRidgeHousing")
    build_housing(KNeighborsRegressor(), "KNNHousing", with_kneighbors=True)
    build_housing(
        MLPRegressor(activation="tanh",
                     hidden_layer_sizes=(26, ),
                     solver="lbfgs",
                     random_state=13,
                     tol=0.001,
                     max_iter=1000), "MLPHousing")
    build_housing(SGDRegressor(random_state=13), "SGDHousing")
    build_housing(SVR(), "SVRHousing")
    build_housing(LinearSVR(random_state=13), "LinearSVRHousing")
    build_housing(NuSVR(), "NuSVRHousing")

#
# Anomaly detection
#


def build_iforest_housing(iforest, name, **pmml_options):
    mapper = DataFrameMapper([(housing_X.columns.values, ContinuousDomain())])
    pipeline = Pipeline([("mapper", mapper), ("estimator", iforest)])
    pipeline.fit(housing_X)
    pipeline = make_pmml_pipeline(pipeline, housing_X.columns.values)
    pipeline.configure(**pmml_options)
    store_pkl(pipeline, name + ".pkl")
    decisionFunction = DataFrame(pipeline.decision_function(housing_X),
                                 columns=["decisionFunction"])
correctedSeries = util.detectAndRemoveOutliers(resampledSeries)

# Step 3 - Scale the series
correctedSeries = util.scaleSeriesStandard(correctedSeries)


# Divide the series into training and testing series
trainingSeries, testingSeries = util.splitIntoTrainingAndTestingSeries(correctedSeries, horizon)

# Learning Process - Start

# Form the feature and target vectors
featureVectors, targetVectors = formFeatureAndTargetVectors(trainingSeries)

# Fit a model
model = NuSVR(kernel="rbf", gamma=1.0, nu=1.0, tol=1e-15)
model.fit(featureVectors, targetVectors[:, 0])

# Learning Process - End

# Predict for testing data points
testingFeatureVectors, testingTargetVectors = formFeatureAndTargetVectors(testingSeries)
predictedTrainingOutputData = model.predict(testingFeatureVectors)

# Predicted and actual Series
actualSeries = testingSeries
predictedSeries = pd.Series(data=predictedTrainingOutputData.flatten(), index=testingSeries.index)

# Learning Process - End

# Step 5 - Descale the series
    Lasso(alpha=2),
    Lasso(alpha=1),
    Lasso(alpha=0.2),
    LassoLars(alpha=1),
    LassoLars(alpha=0.1),
    LassoLars(alpha=0.01),
    LassoLars(alpha=0.001),
    LassoLars(alpha=0.0003),
    Ridge(alpha=0.01, max_iter=5000),
    Ridge(alpha=0.001, max_iter=5000),
    Ridge(alpha=0.0001, max_iter=5000),
    Ridge(alpha=0.00001, max_iter=5000),
    Lars(),
    SVR(gamma='auto'),
    LinearSVR(max_iter=10000),
    NuSVR(gamma='auto'),
    LogisticRegression(solver='lbfgs'),
    LinearRegression(),
    KernelRidge()
]

model_names = [
    "PLS 1-component", "PLS 2-component", "PLS 3-component", "PLS 4-component",
    "Lasso alpha 5", "Lasso alpha 2", "Lasso alpha 1", "Lasso alpha 0.2",
    "LassoLars alpha 1", "LassoLars alpha 0.1", "LassoLars alpha 0.01",
    "LassoLars alpha 0.001", "LassoLars alpha 0.0003", "Ridge alpha 0.01",
    "Ridge alpha 0.001", "Ridge alpha 0.0001", "Ridge alpha 0.00001", "Lars",
    "SVR", "LinearSVR", "NuSVR", "LogisticRegression", "LinearRegression",
    "Kernel Ridge"
]
Пример #13
0
    [LogisticRegression(random_state=42)],
    [SGDClassifier(**SGD_KWARGS)],
    [SVC(kernel='linear', random_state=42)],
    [NuSVC(kernel='linear', random_state=42)],
])
def test_explain_linear_binary(newsgroups_train_binary, clf):
    assert_explained_weights_linear_classifier(newsgroups_train_binary,
                                               clf,
                                               binary=True)


@pytest.mark.parametrize(['clf'], [
    [SVC()],
    [NuSVC()],
    [SVR()],
    [NuSVR()],
])
def test_explain_linear_unsupported_kernels(clf):
    res = explain_weights(clf)
    assert 'supported' in res.error


@pytest.mark.parametrize(['clf'], [
    [SVC(kernel='linear')],
    [NuSVC(kernel='linear')],
])
def test_explain_linear_unsupported_multiclass(clf, newsgroups_train):
    docs, y, target_names = newsgroups_train
    vec = TfidfVectorizer()
    clf.fit(vec.fit_transform(docs), y)
    expl = explain_weights(clf, vec=vec)
def all_regressor_models():
    models = []
    metrix = []
    train_accuracy = []
    test_accuracy = []
    models.append(('LinearRegression', LinearRegression()))
    models.append(('DecisionTreeRegressor', DecisionTreeRegressor()))
    models.append(('RandomForestRegressor', RandomForestRegressor()))
    models.append(('BaggingRegressor', BaggingRegressor()))
    models.append(('GradientBoostingRegressor', GradientBoostingRegressor()))
    models.append(('AdaBoostRegressor', AdaBoostRegressor()))
    models.append(('SVR', SVR()))
    models.append(('KNeighborsRegressor', KNeighborsRegressor()))    
    models.append(('ARDRegression', ARDRegression()))
    models.append(('BayesianRidge', BayesianRidge()))
    models.append(('ElasticNet', ElasticNet()))
    models.append(('ElasticNetCV', ElasticNetCV()))
    models.append(('Lars', Lars()))
    models.append(('LassoCV', LassoCV()))
    models.append(('LassoLars', LassoLars()))
    models.append(('LassoLarsCV', LassoLarsCV()))
    models.append(('MultiTaskElasticNet', MultiTaskElasticNet()))
    models.append(('MultiTaskLasso', MultiTaskLasso()))
    models.append(('MultiTaskLassoCV', MultiTaskLassoCV()))
    models.append(('OrthogonalMatchingPursuit', OrthogonalMatchingPursuit()))
    models.append(('OrthogonalMatchingPursuitCV', OrthogonalMatchingPursuitCV()))
    models.append(('PassiveAggressiveClassifier', PassiveAggressiveClassifier()))
    models.append(('RANSACRegressor', RANSACRegressor()))
    models.append(('Ridge', Ridge()))
    models.append(('RidgeCV', RidgeCV()))
    models.append(('SGDRegressor', SGDRegressor()))
    models.append(('TheilSenRegressor', TheilSenRegressor()))
    models.append(('TransformedTargetRegressor', TransformedTargetRegressor()))
    models.append(('LinearSVR', LinearSVR()))
    models.append(('NuSVR', NuSVR()))
    models.append(('MLPRegressor', MLPRegressor()))
    models.append(('CCA', CCA()))
    models.append(('PLSRegression', PLSRegression()))
    models.append(('PLSCanonical', PLSCanonical()))
    models.append(('GaussianProcessClassifier', GaussianProcessClassifier()))
    models.append(('GradientBoostingRegressor', GradientBoostingRegressor()))
    models.append(('HistGradientBoostingRegressor', HistGradientBoostingRegressor()))
    estimators = [('lr', RidgeCV()),('svr', LinearSVR(random_state=42))]
    models.append(('StackingRegressor', StackingRegressor(estimators=estimators,final_estimator=RandomForestRegressor(n_estimators=10,random_state=42))))
    r1 = LinearRegression()
    r2 = RandomForestRegressor(n_estimators=10, random_state=1)
    models.append(('VotingRegressor', VotingRegressor([('lr', r1), ('rf', r2)])))
    models.append(('ExtraTreesRegressor', ExtraTreesRegressor()))
    models.append(('IsotonicRegression', IsotonicRegression()))
    models.append(('KernelRidge', KernelRidge()))
    models.append(('RadiusNeighborsClassifier', RadiusNeighborsClassifier()))
    test_acc=[]
    names=[]
    for name, model in models:
        try:
            m = model
            m.fit(X_train, y_train)
            y_pred = m.predict(X_test)
            r_square = r2_score(y_test,y_pred)
            rmse = np.sqrt(mean_squared_error(y_test,y_pred))
            test_acc.append(r_square)
            names.append(name)            
            #print(name," ( r_square , rmse) is: ", r_square, rmse)
            metrix.append((name, r_square, rmse))
        except:
            print("Excepton Occured  : ",name)
    return metrix,test_acc,names
 def test_convert_nusvr_default(self):
     model, X = self._fit_binary_classification(NuSVR())
     model_onnx = convert_sklearn(
         model, "SVR", [("input", FloatTensorType([1, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X, model, model_onnx, basename="SklearnRegNuSVR2")