def train(self, cvs, init_params=[], FS=False, inner_jobs=1): print('training with deap...') X = np.vstack((cvs[0][0], cvs[0][2], cvs[0][4])) if len(cvs[0][1].shape)==1 and len(cvs[0][5].shape)==1: y = np.hstack((cvs[0][1], cvs[0][3], cvs[0][5])) else: y = np.vstack((cvs[0][1], cvs[0][3], cvs[0][5])).ravel() self.D, self.N = X.shape if 'elasticnet' in str.lower(self.model_type): X_train = cvs[0][0] y_train = cvs[0][1].reshape(-1, 1) X_val = cvs[0][2] y_val = cvs[0][3].reshape(-1, 1) X_test = cvs[0][4] y_test = cvs[0][5].reshape(-1, 1) X_train = np.vstack((X_train, X_val, X_test)) y_train = np.vstack((y_train, y_val, y_test)) model = ElasticNetCV(cv=5, max_iter=4000) model.fit(X_train, y_train.ravel()) self.best_params = model.get_params() ypred = model.predict(X_test).ravel() if self.rated is None: self.accuracy = np.mean(np.abs(ypred - y_test.ravel()) / y_test.ravel()) else: self.accuracy = np.mean(np.abs(ypred - y_test.ravel())) self.acc_test = self.accuracy self.model = model self.logger.info('Best params') self.logger.info(self.best_params) self.logger.info('Final mae %s', str(self.acc_test)) self.logger.info('Final rms %s', str(self.accuracy)) self.logger.info('finish train for model %s', self.model_type) self.istrained = True self.save(self.model_dir) return self.to_dict() else: if 'xgb' in str.lower(self.model_type): params = {'learning_rate': np.logspace(-5, -1, num=6, base=10), 'max_depth': np.unique(np.linspace(1, 150, num=50).astype('int')), 'colsample_bytree': np.linspace(0.4, 1.0, num=60), 'colsample_bynode': np.linspace(0.4, 1.0, num=60), 'subsample': np.linspace(0.2, 1.0, num=6), 'gamma': np.linspace(0.001, 2, num=20), 'reg_alpha': np.linspace(0, 1.0, num=12)} model = xgb.XGBRegressor(objective="reg:squarederror", random_state=42) ngen = self.static_data['sklearn']['gen'] npop = self.static_data['sklearn']['pop'] elif 'rf' in str.lower(self.model_type): if FS: params = { 'max_depth': [1, 2, 3, 5, 10, 16, 24, 36, 52, 76, 96, 128, 150], } model = RandomForestRegressor(n_estimators=100, n_jobs=inner_jobs,random_state=42, max_features=2/3) ngen = 2 npop = 4 else: params = { 'max_depth': np.unique(np.linspace(1, 130, num=50).astype('int')), 'max_features': ['auto', 'sqrt', 'log2', None, 0.8, 0.6, 0.4], 'min_samples_leaf': np.unique(np.linspace(1, cvs[0][0].shape[0]-10, num=50).astype('int')), 'min_samples_split': np.unique(np.linspace(2, cvs[0][0].shape[0]-10, num=50).astype('int')), } model = RandomForestRegressor(n_estimators=500, random_state=42) ngen = self.static_data['sklearn']['gen'] npop = self.static_data['sklearn']['pop'] elif str.lower(self.model_type)=='svm': params = {'C': np.logspace(-2, 3, num=100, base=10), 'kernel':['linear', 'poly', 'rbf', 'sigmoid'], 'gamma': list(np.linspace(0.001, 2, num=100)) + ['scale', 'auto']} model = SVR(max_iter=1000000) ngen = self.static_data['sklearn']['gen'] npop = self.static_data['sklearn']['pop'] elif str.lower(self.model_type)=='nusvm': params = {'nu': np.linspace(0.01, 0.99, num=10), 'C': np.logspace(-1, 5, num=100, base=10), 'gamma': np.linspace(0.01, 10, num=100)} model = NuSVR(max_iter=1000000) ngen = self.static_data['sklearn']['gen'] npop = self.static_data['sklearn']['pop'] elif 'mlp' in str.lower(self.model_type): if not self.is_combine: params = {'hidden_layer_sizes': np.linspace(4, 800, num=50).astype('int'), 'alpha': np.linspace(1e-5, 1e-1, num=4), } else: params = {'hidden_layer_sizes': np.linspace(4, 250, num=50).astype('int'), 'activation': ['identity', 'tanh', 'relu'], 'alpha': np.linspace(1e-5, 1e-1, num=4), } model = MLPRegressor(max_iter=1000, early_stopping=True) ngen = 5 npop = self.static_data['sklearn']['pop'] if not self.path_group is None: ncpus = joblib.load(os.path.join(self.path_group, 'total_cpus.pickle')) gpu_status = joblib.load(os.path.join(self.path_group, 'gpu_status.pickle')) njobs = int(ncpus - gpu_status) cpu_status = njobs joblib.dump(cpu_status, os.path.join(self.path_group, 'cpu_status.pickle')) else: njobs = self.njobs cv = EvolutionaryAlgorithmSearchCV(estimator=model, params=params, scoring='neg_root_mean_squared_error', cv=3, rated=self.rated, verbose=1, population_size=npop, gene_mutation_prob=0.8, gene_crossover_prob=0.8, tournament_size=3, generations_number=ngen, refit=False, init_params=init_params, n_jobs=njobs, path_group=self.path_group) cv.fit(cvs) self.best_params = cv.best_params_ self.accuracy, self.acc_test = self.fit_model1(model, self.best_params, cvs) self.model = model self.model.set_params(**self.best_params) self.model.fit(X, y.ravel()) self.logger.info('Best params') self.logger.info(self.best_params) self.logger.info('Final mae %s', str(self.acc_test)) self.logger.info('Final rms %s', str(self.accuracy)) self.logger.info('finish train for model %s', self.model_type) self.istrained = True self.save(self.model_dir) return self.to_dict()
# "Passive Aggressive Regressor ": PassiveAggressiveRegressor(max_iter=100000, tol=0.5), # "random forest regressor": RandomForestRegressor(n_estimators=10), # "gradient boosting regressor": GradientBoostingRegressor(min_samples_leaf=3), # "k nearest neighbiours regressor": KNeighborsRegressor(), # "RANSAC regressor": RANSACRegressor(), "SGD regressor": SGDRegressor(max_iter=100000, tol=0.5), # "kernel ridge": KernelRidge(), # "ada boost regressor": AdaBoostRegressor(), # "bagging regressor": BaggingRegressor(), # "extra trees regressor": ExtraTreesRegressor(n_estimators=10), # "dummy regressor": DummyRegressor(), # "PLSR regressor": PLSRegression(), # "radius neighbours regressor": RadiusNeighborsRegressor(radius=5), # "neural_network.MLPRegressor 500": MLPRegressor(hidden_layer_sizes=(50)), # "svm.SVR": SVR(gamma="scale"), "svm.NuSVR epsilon=": NuSVR(nu=0.7, gamma="scale") # "svm.LinearSVR epsilom=": LinearSVR(max_iter=10000) # "decision tree regressor": DecisionTreeRegressor(), # "extra tree regressor": ExtraTreeRegressor() } # models = { # "1":MLPRegressor(hidden_layer_sizes=(64,2), solver="adam"), # "2":MLPRegressor(hidden_layer_sizes=(64,2), solver="lbfgs"), # } cp(t, "initialising models") results = [] rand = [0,0]
from sklearn.model_selection import cross_validate from sklearn.model_selection import cross_val_score from sklearn.model_selection import validation_curve csv = np.genfromtxt ('data.csv', delimiter=",") ftest = csv[:,0] ftrain= csv[:,1] Xtest=csv[:,2:21] Xtrain=csv[:,21:40] ytest=csv[:,40] ytrain=csv[:,41] #classifier = Ridge(alpha=1.5) #classifier=SVR(gamma='scale', C=1.5, epsilon=0.2) classifier=NuSVR(gamma='scale', C=1.5, nu=0.1) classifier.fit(Xtrain,ytrain) prediction=classifier.predict(Xtest) sqerror = (prediction-ytest)**2 meansquareerror=np.mean((prediction-ytest)**2) # print(meansquareerror) score=cross_validate(classifier,Xtrain,ytrain,scoring='neg_mean_squared_error',cv=16,return_train_score=False) # print(score) accuracy=cross_val_score(estimator=classifier,X=Xtrain,y=ytrain,cv=10) np.random.seed(0) temp=np.arange(ytrain.shape[0]) np.random.shuffle(temp) Xtrain,ytrain=Xtrain[temp],ytrain[temp] train_score,valid_score= validation_curve(SVR(),Xtrain,ytrain,"gamma",np.logspace(-1,3,3),cv=8) #print(train_score) #print(valid_score)
def getModel(config, modelname): info("Getting {0} Model".format(modelname), ind=0) problemType = config['problem'] modelData = getModelData(config, modelname) modelParams = modelData.get('params') retval = None ########################################################################### # Classification ########################################################################### if isClassification(problemType): if modelname == "logistic": retval = classifier(modelname, LogisticRegression(), modelParams) if modelname == "sgd": retval = classifier(modelname, SGDClassifier(), modelParams) if modelname == "passagg": retval = classifier(modelname, PassiveAggressiveClassifier(), modelParams) if modelname == "mlp": retval = classifier(modelname, MLPClassifier(), modelParams) if modelname == "xgboost": retval = classifier(modelname, XGBClassifier(), modelParams) if modelname == "gaussproc": retval = classifier(modelname, GaussianProcessClassifier(), modelParams) if modelname == "lda": retval = classifier(modelname, LinearDiscriminantAnalysis(), modelParams) if modelname == "qda": retval = classifier(modelname, QuadraticDiscriminantAnalysis(), modelParams) if modelname == "nb": retval = classifier(modelname, GaussianNB(), modelParams) if modelname == "nbbern": retval = classifier(modelname, BernoulliNB(), modelParams) if modelname == "nbmulti": retval = classifier(modelname, MultinomialNB(), modelParams) if modelname == "dtree": retval = classifier(modelname, DecisionTreeClassifier(), modelParams) if modelname == "kneighbors": retval = classifier(modelname, KNeighborsClassifier(), modelParams) if modelname == "rneighbors": retval = classifier(modelname, RadiusNeighborsClassifier(), modelParams) if modelname == "svmlin": retval = classifier(modelname, LinearSVC(), modelParams) if modelname == "svmnupoly": retval = classifier(modelname, NuSVC(), modelParams) if modelname == "svmnulinear": retval = classifier(modelname, NuSVC(), modelParams) if modelname == "svmnusigmoid": retval = classifier(modelname, NuSVC(), modelParams) if modelname == "svmnurbf": retval = classifier(modelname, NuSVC(), modelParams) if modelname == "svmepspoly": retval = classifier(modelname, SVC(), modelParams) if modelname == "svmepslinear": retval = classifier(modelname, SVC(), modelParams) if modelname == "svmepssigmoid": retval = classifier(modelname, SVC(), modelParams) if modelname == "svmepsrbf": retval = classifier(modelname, SVC(), modelParams) if modelname == "rf": retval = classifier(modelname, RandomForestClassifier(), modelParams) if modelname == "extratrees": retval = classifier(modelname, ExtraTreesClassifier(), modelParams) if modelname == "adaboost": retval = classifier(modelname, AdaBoostClassifier(), modelParams) if modelname == "gbm": retval = classifier(modelname, GradientBoostingClassifier(), modelParams) if modelname == "tpot": retval = classifier(modelname, TPOTClassifier(), modelParams) ####################################################################### # Regression ####################################################################### if modelname == "lightning": retval = external.extlightning.createLightningClassifier( modelParams) ########################################################################### # Regression ########################################################################### if isRegression(problemType): if modelname == "linear": retval = classifier(modelname, LinearRegression(), modelParams) if modelname == "ridge": retval = classifier(modelname, Ridge(), modelParams) if modelname == "lasso": retval = classifier(modelname, Lasso(), modelParams) if modelname == "elasticnet": retval = classifier(modelname, ElasticNet(), modelParams) if modelname == "omp": retval = classifier(modelname, OrthogonalMatchingPursuit(), modelParams) if modelname == "bayesridge": retval = classifier(modelname, BayesianRidge(), modelParams) if modelname == "ard": retval = classifier(modelname, ARDRegression(), modelParams) if modelname == "sgd": retval = classifier(modelname, SGDRegressor(), modelParams) if modelname == "passagg": retval = classifier(modelname, PassiveAggressiveRegressor(), modelParams) if modelname == "perceptron": retval = None if modelname == "huber": retval = classifier(modelname, HuberRegressor(), modelParams) if modelname == "theilsen": retval = classifier(modelname, TheilSenRegressor(), modelParams) if modelname == "ransac": retval = classifier(modelname, RANSACRegressor(), modelParams) if modelname == "mlp": retval = classifier(modelname, MLPRegressor(), modelParams) if modelname == "xgboost": retval = classifier(modelname, XGBRegressor(), modelParams) if modelname == "gaussproc": retval = classifier(modelname, GaussianProcessRegressor(), modelParams) if modelname == "dtree": retval = classifier(modelname, DecisionTreeRegressor(), modelParams) if modelname == "kneighbors": retval = classifier(modelname, KNeighborsRegressor(), modelParams) if modelname == "rneighbors": retval = classifier(modelname, RadiusNeighborsRegressor(), modelParams) if modelname == "svmlin": retval = classifier(modelname, LinearSVR(), modelParams) if modelname == "svmnupoly": retval = classifier(modelname, NuSVR(), modelParams) if modelname == "svmnulinear": retval = classifier(modelname, NuSVR(), modelParams) if modelname == "svmnusigmoid": retval = classifier(modelname, NuSVR(), modelParams) if modelname == "svmnurbf": retval = classifier(modelname, NuSVR(), modelParams) if modelname == "svmepspoly": retval = classifier(modelname, SVR(), modelParams) if modelname == "svmepslinear": retval = classifier(modelname, SVR(), modelParams) if modelname == "svmepssigmoid": retval = classifier(modelname, SVR(), modelParams) if modelname == "svmepsrbf": retval = classifier(modelname, SVR(), modelParams) if modelname == "rf": retval = classifier(modelname, RandomForestRegressor(), modelParams) if modelname == "extratrees": retval = classifier(modelname, ExtraTreesRegressor(), modelParams) if modelname == "adaboost": retval = classifier(modelname, AdaBoostRegressor(), modelParams) if modelname == "gbm": retval = classifier(modelname, GradientBoostingRegressor(), modelParams) if modelname == "isotonic": retval = classifier(modelname, IsotonicRegression(), modelParams) if modelname == "earth": retval = classifier(modelname, Earth(), modelParams) if modelname == "symbolic": retval = classifier(modelname, SymbolicRegressor(), modelParams) if modelname == "tpot": retval = classifier(modelname, TPOTRegressor(), modelParams) if retval is None: raise ValueError( "No model with name {0} was created".format(modelname)) model = retval.get() return model
print(housing.shape) store_pkl(housing_mapper, "Housing.pkl") housing_X = housing[:, 0:13] housing_y = housing[:, 13] def build_housing(regressor, name, to_sparse=False): if (to_sparse): regressor = regressor.fit(sparse.csr_matrix(housing_X), housing_y) else: regressor = regressor.fit(housing_X, housing_y) store_pkl(regressor, name + ".pkl") medv = DataFrame(regressor.predict(housing_X), columns=["MEDV"]) store_csv(medv, name + ".csv") build_housing( MLPRegressor(activation="tanh", hidden_layer_sizes=(26, ), algorithm="l-bfgs", random_state=13, tol=0.001, max_iter=1000), "MLPHousing") build_housing(SGDRegressor(random_state=13), "SGDHousing") build_housing(SVR(), "SVRHousing", to_sparse=True) build_housing(LinearSVR(random_state=13), "LinearSVRHousing", to_sparse=True) build_housing(NuSVR(), "NuSVRHousing", to_sparse=True)
def _test_evaluation(self, allow_slow): """ Test that the same predictions are made """ # Generate some smallish (some kernels take too long on anything else) random data x, y = [], [] for _ in range(50): cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2) x.append([cur_x1, cur_x2]) y.append(1 + 2 * cur_x1 + 3 * cur_x2) input_names = ["x1", "x2"] df = pd.DataFrame(x, columns=input_names) # Parameters to test kernel_parameters = [ {}, { "kernel": "rbf", "gamma": 1.2 }, { "kernel": "linear" }, { "kernel": "poly" }, { "kernel": "poly", "degree": 2 }, { "kernel": "poly", "gamma": 0.75 }, { "kernel": "poly", "degree": 0, "gamma": 0.9, "coef0": 2 }, { "kernel": "sigmoid" }, { "kernel": "sigmoid", "gamma": 1.3 }, { "kernel": "sigmoid", "coef0": 0.8 }, { "kernel": "sigmoid", "coef0": 0.8, "gamma": 0.5 }, ] non_kernel_parameters = [ {}, { "C": 1 }, { "C": 1.5, "shrinking": True }, { "C": 0.5, "shrinking": False, "nu": 0.9 }, ] # Test for param1 in non_kernel_parameters: for param2 in kernel_parameters: cur_params = param1.copy() cur_params.update(param2) cur_model = NuSVR(**cur_params) cur_model.fit(x, y) df["prediction"] = cur_model.predict(x) spec = scikit_converter.convert(cur_model, input_names, "target") if _is_macos() and _macos_version() >= (10, 13): metrics = evaluate_regressor(spec, df) self.assertAlmostEqual(metrics["max_error"], 0) if not allow_slow: break if not allow_slow: break
model_features=X.columns, feature='max') # plot it pdp.pdp_plot(pdp_goals, 'max') plt.show() # Create the data that we will plot pdp_goals = pdp.pdp_isolate(model=rfc_model, dataset=val_X, model_features=X.columns, feature='min') # plot it pdp.pdp_plot(pdp_goals, 'min') plt.show() svm = NuSVR() svm.fit(X_train_scaled, y_train.values.flatten()) y_pred_svm = svm.predict(X_train_scaled) score = mean_absolute_error(y_train.values.flatten(), y_pred_svm) print(f'Score: {score:0.3f}') folds = KFold(n_splits=5, shuffle=True, random_state=42) params = { 'objective': "regression", 'boosting': "gbdt", 'metric': "mae", 'boost_from_average': "false", 'num_threads': 8, 'learning_rate': 0.001, 'num_leaves': 52, 'max_depth': -1, 'tree_learner': "serial",
def createModel(self): if self.checkErrors(): return gamma_choice = self.gamma_choice.get() kernels = ["linear", "rbf", "poly", "sigmoid"] kernel = kernels[self.kernel_type_var.get()] do_forecast = self.do_forecast_option.get() val_option = self.validation_option.get() X, y = self.getData() X: np.ndarray y: np.ndarray if self.grid_option_var.get() == 0: epsilon = float(self.parameters[0].get()) nu = float(self.parameters[1].get()) C = 2 ** float(self.parameters[2].get()) gamma = 2 ** float(self.parameters[3].get()) if gamma_choice == 2 else "auto" if gamma_choice == 1 else "scale" coef0 = float(self.parameters[4].get()) degree = float(self.parameters[5].get()) if self.model_type_var.get() == 0: model = SVR(kernel=kernel, C=C, epsilon=epsilon, gamma=gamma, coef0=coef0, degree=degree) else: model = NuSVR(kernel=kernel, C=C, nu=nu, gamma=gamma, coef0=coef0, degree=degree) if val_option == 0: model.fit(X, y) if do_forecast == 0: pred = model.predict(X).reshape(-1) if self.scale_var.get() != "None": pred = self.label_scaler.inverse_transform(pred.reshape(-1,1)).reshape(-1) # type: ignore y = self.label_scaler.inverse_transform(y.reshape(-1,1)).reshape(-1) # type: ignore losses = loss(y, pred)[:-1] self.y_test = y self.pred = pred for i,j in enumerate(losses): self.test_metrics_vars[i].set(j) self.model = model # type: ignore elif val_option == 1: if do_forecast == 0: X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=self.random_percent_var.get()/100) model.fit(X_train, y_train) pred = model.predict(X_test).reshape(-1) if self.scale_var.get() != "None": pred = self.label_scaler.inverse_transform(pred.reshape(-1,1)).reshape(-1) # type: ignore y_test = self.label_scaler.inverse_transform(y_test.reshape(-1,1)).reshape(-1) # type: ignore losses = loss(y_test, pred)[:-1] self.y_test = y_test self.pred = pred for i,j in enumerate(losses): self.test_metrics_vars[i].set(j) else: size = int((self.random_percent_var.get()/100)*len(X)) X = X[-size:] y = y[-size:] model.fit(X, y) self.model = model # type: ignore elif val_option == 2: if do_forecast == 0: cvs = cross_validate(model, X, y, cv=self.cross_val_var.get(), scoring=skloss) for i,j in enumerate(list(cvs.values())[2:]): self.test_metrics_vars[i].set(j.mean()) elif val_option == 3: if do_forecast == 0: cvs = cross_validate(model, X, y, cv=X.shape[0]-1, scoring=skloss) for i,j in enumerate(list(cvs.values())[2:]): self.test_metrics_vars[i].set(j.mean()) else: params = {} interval = self.interval_var.get() params["C"] = np.unique(np.logspace(float(self.optimization_parameters[2][0].get()), float(self.optimization_parameters[2][1].get()), interval, base=2)) if self.model_type_var.get() == 0: params["epsilon"] = np.unique(np.linspace(float(self.optimization_parameters[0][0].get()), float(self.optimization_parameters[0][1].get()), interval)) model = SVR() else: min_nu = max(0.0001, float(self.optimization_parameters[1][0].get())) max_nu = min(1, float(self.optimization_parameters[1][1].get())) params["nu"] = np.unique(np.linspace(min_nu, max_nu, interval)) model = NuSVR() if kernel != "linear": if gamma_choice == 2: params["gamma"] = np.unique(np.logspace(float(self.optimization_parameters[3][0].get()), float(self.optimization_parameters[3][1].get()), interval, base=2)) elif gamma_choice == 1: params["gamma"] = ["auto"] else: params["gamma"] = ["scale"] if kernel == "poly" or kernel == "sigmoid": params["coef0"] = np.unique(np.linspace(float(self.optimization_parameters[4][0].get()), float(self.optimization_parameters[4][1].get()), interval)) if kernel == "poly": params["degree"] = np.unique(np.linspace(float(self.optimization_parameters[5][0].get()), float(self.optimization_parameters[5][1].get()), interval, dtype=int)) params["kernel"] = [kernel] cv = self.gs_cross_val_var.get() if self.gs_cross_val_option.get() == 1 else None regressor = GridSearchCV(model, params, cv=cv) if val_option == 0: regressor.fit(X, y) if do_forecast == 0: pred = regressor.predict(X) if self.scale_var.get() != "None": pred = self.label_scaler.inverse_transform(pred.reshape(-1,1)).reshape(-1) # type: ignore y = self.label_scaler.inverse_transform(y.reshape(-1,1)).reshape(-1) # type: ignore losses = loss(y, pred)[:-1] self.y_test = y self.pred = pred for i,j in enumerate(losses): self.test_metrics_vars[i].set(j) self.model = regressor.best_estimator_ elif val_option == 1: if do_forecast == 0: X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=self.random_percent_var.get()/100) regressor.fit(X_train, y_train) pred = regressor.predict(X_test) if self.scale_var.get() != "None": pred = self.label_scaler.inverse_transform(pred.reshape(-1,1)).reshape(-1) # type: ignore y_test = self.label_scaler.inverse_transform(y_test.reshape(-1,1)).reshape(-1) # type: ignore losses = loss(y_test, pred)[:-1] self.y_test = y_test self.pred = pred for i,j in enumerate(losses): self.test_metrics_vars[i].set(j) else: size = int((self.random_percent_var.get()/100)*len(X)) X = X[-size:] y = y[-size:] regressor.fit(X, y) self.model = regressor.best_estimator_ popupmsg("Best Params: " + str(self.model.get_params()))
def _fit_transform(self, Xtr, Ytr, n_drop = 100, regression_method = 'linear', regression_parameters = None, embedding = 'identity', n_dim = 3, embedding_parameters = None): n_data, dim_data = Xtr.shape _, dim_output = Ytr.shape self._dim_output = dim_output # If this is the first time the network is tuned, set the input and feedback weights. # The weights are dense and uniformly distributed in [-1.0, 1.0] if (self._input_weights is None): self._input_weights = 2.0*np.random.rand(self._n_internal_units, dim_data) - 1.0 if (self._feedback_weights is None): self._feedback_weights = 2.0*np.random.rand(self._n_internal_units, dim_output) - 1.0 # Initialize regression method if (regression_method == 'nusvr'): # NuSVR, RBF kernel C, nu, gamma = regression_parameters self._regression_method = NuSVR(C = C, nu = nu, gamma = gamma) elif (regression_method == 'linsvr'): # NuSVR, linear kernel #C = regression_parameters[0] #nu = regression_parameters[1] C, epsilon = regression_parameters #self._regression_method = NuSVR(C = C, nu = nu, kernel='linear') self._regression_method = LinearSVR(C = C, epsilon = epsilon) elif (regression_method == 'enet'): # Elastic net alpha, l1_ratio = regression_parameters self._regression_method = ElasticNet(alpha = alpha, l1_ratio = l1_ratio) elif (regression_method == 'ridge'): # Ridge regression self._regression_method = Ridge(alpha = regression_parameters) elif (regression_method == 'lasso'): # LASSO self._regression_method = Lasso(alpha = regression_parameters) elif (regression_method == 'bayeridge'): lambda_1, lambda_2, alpha_1, alpha_2 = regression_parameters self._regression_method = BayesianRidge(lambda_1=lambda_1,lambda_2=lambda_2,alpha_1=alpha_1,alpha_2=alpha_2) elif (regression_method == 'gpr'): self._regression_method = GaussianProcessRegressor() else: # Use canonical linear regression self._regression_method = LinearRegression() # Initialize embedding method if (embedding == 'identity'): self._embedding_dimensions = self._n_internal_units else: self._embedding_dimensions = n_dim if (embedding == 'kpca'): # Kernel PCA with RBF kernel self._embedding_method = KernelPCA(n_components = n_dim, kernel = 'rbf', gamma = embedding_parameters) elif (embedding == 'pca'): # PCA self._embedding_method = PCA(n_components = n_dim) elif (embedding == 'fa'): # ICA self._embedding_method = FactorAnalysis(n_components = n_dim) elif (embedding == 'spca'): # Sparse PCA self._embedding_method = SparsePCA(n_components = n_dim, alpha = embedding_parameters) elif (embedding == 'ipca'): # Sparse PCA self._embedding_method = IncrementalPCA(n_components = n_dim) elif (embedding == 'tsvd'): # Sparse PCA if n_dim >= self._n_internal_units: self._embedding_method = TruncatedSVD(n_components = self._n_internal_units-1) else: self._embedding_method = TruncatedSVD(n_components = n_dim) elif (embedding == 'wpca'): # Bayesian Probabilistic PCA self._embedding_method = WPCA(n_components=n_dim) elif (embedding == 'rpca'): # Bayesian Probabilistic PCA self._embedding_method = RobustPCA.RobustPCA() elif (embedding == 'tga'): # Bayesian Probabilistic PCA self._embedding_method = tga.TGA(n_components=n_dim, random_state=1) elif (embedding == 'empca'): # Expectation Maximization PCA self._embedding_method = EMPCA(n_components=n_dim) elif (embedding == 'mds'): # Multi-Dimensional Scaling (MDS) self._embedding_method = MDS(n_components=n_dim) elif (embedding == 'ica'): # Sparse PCA alpha = embedding_parameters self._embedding_method = FastICA.FastICA(n_components=n_dim) #self._embedding_method = FastICA.FastICA(n_components=n_dim, fun_args={'alpha':alpha}) #self._embedding_method = FastICA.FastICA(n_components = n_dim, algorithm = 'deflation') else: raise(ValueError, "Unknown embedding method") # Calculate states/embedded states. # Note: If the embedding is 'identity', embedded states will be equal to the states. states, embedded_states,_ = self._compute_state_matrix(X = Xtr, Y = Ytr, n_drop = n_drop) # Train output if self._regression_method == 'rvr': np.savetxt('/home/minh/Desktop/vb_linear/input_rvr', np.concatenate((embedded_states, self._scaleshift(Xtr[n_drop:,:], self._input_scaling, self._input_shift)), axis=1),delimiter=',') np.savetxt('/home/minh/Desktop/vb_linear/output_rvr', self._scaleshift(Ytr[n_drop:,:], self._teacher_scaling, self._teacher_shift).flatten(),delimiter=',') subprocess.call("~/PycharmProjects/MultivariateESN/run_rvr.sh",shell=True) print('end run_rvr!') else: self._regression_method.fit(np.concatenate((embedded_states, self._scaleshift(Xtr[n_drop:, :], self._input_scaling, self._input_shift)), axis=1), self._scaleshift(Ytr[n_drop:, :], self._teacher_scaling,self._teacher_shift).flatten()) return states, embedded_states
min_samples_leaf=5), random_state=13, n_estimators=17), "AdaBoostHousing") build_housing(BayesianRidge(), "BayesianRidgeHousing") build_housing(KNeighborsRegressor(), "KNNHousing", with_kneighbors=True) build_housing( MLPRegressor(activation="tanh", hidden_layer_sizes=(26, ), solver="lbfgs", random_state=13, tol=0.001, max_iter=1000), "MLPHousing") build_housing(SGDRegressor(random_state=13), "SGDHousing") build_housing(SVR(), "SVRHousing") build_housing(LinearSVR(random_state=13), "LinearSVRHousing") build_housing(NuSVR(), "NuSVRHousing") # # Anomaly detection # def build_iforest_housing(iforest, name, **pmml_options): mapper = DataFrameMapper([(housing_X.columns.values, ContinuousDomain())]) pipeline = Pipeline([("mapper", mapper), ("estimator", iforest)]) pipeline.fit(housing_X) pipeline = make_pmml_pipeline(pipeline, housing_X.columns.values) pipeline.configure(**pmml_options) store_pkl(pipeline, name + ".pkl") decisionFunction = DataFrame(pipeline.decision_function(housing_X), columns=["decisionFunction"])
correctedSeries = util.detectAndRemoveOutliers(resampledSeries) # Step 3 - Scale the series correctedSeries = util.scaleSeriesStandard(correctedSeries) # Divide the series into training and testing series trainingSeries, testingSeries = util.splitIntoTrainingAndTestingSeries(correctedSeries, horizon) # Learning Process - Start # Form the feature and target vectors featureVectors, targetVectors = formFeatureAndTargetVectors(trainingSeries) # Fit a model model = NuSVR(kernel="rbf", gamma=1.0, nu=1.0, tol=1e-15) model.fit(featureVectors, targetVectors[:, 0]) # Learning Process - End # Predict for testing data points testingFeatureVectors, testingTargetVectors = formFeatureAndTargetVectors(testingSeries) predictedTrainingOutputData = model.predict(testingFeatureVectors) # Predicted and actual Series actualSeries = testingSeries predictedSeries = pd.Series(data=predictedTrainingOutputData.flatten(), index=testingSeries.index) # Learning Process - End # Step 5 - Descale the series
Lasso(alpha=2), Lasso(alpha=1), Lasso(alpha=0.2), LassoLars(alpha=1), LassoLars(alpha=0.1), LassoLars(alpha=0.01), LassoLars(alpha=0.001), LassoLars(alpha=0.0003), Ridge(alpha=0.01, max_iter=5000), Ridge(alpha=0.001, max_iter=5000), Ridge(alpha=0.0001, max_iter=5000), Ridge(alpha=0.00001, max_iter=5000), Lars(), SVR(gamma='auto'), LinearSVR(max_iter=10000), NuSVR(gamma='auto'), LogisticRegression(solver='lbfgs'), LinearRegression(), KernelRidge() ] model_names = [ "PLS 1-component", "PLS 2-component", "PLS 3-component", "PLS 4-component", "Lasso alpha 5", "Lasso alpha 2", "Lasso alpha 1", "Lasso alpha 0.2", "LassoLars alpha 1", "LassoLars alpha 0.1", "LassoLars alpha 0.01", "LassoLars alpha 0.001", "LassoLars alpha 0.0003", "Ridge alpha 0.01", "Ridge alpha 0.001", "Ridge alpha 0.0001", "Ridge alpha 0.00001", "Lars", "SVR", "LinearSVR", "NuSVR", "LogisticRegression", "LinearRegression", "Kernel Ridge" ]
[LogisticRegression(random_state=42)], [SGDClassifier(**SGD_KWARGS)], [SVC(kernel='linear', random_state=42)], [NuSVC(kernel='linear', random_state=42)], ]) def test_explain_linear_binary(newsgroups_train_binary, clf): assert_explained_weights_linear_classifier(newsgroups_train_binary, clf, binary=True) @pytest.mark.parametrize(['clf'], [ [SVC()], [NuSVC()], [SVR()], [NuSVR()], ]) def test_explain_linear_unsupported_kernels(clf): res = explain_weights(clf) assert 'supported' in res.error @pytest.mark.parametrize(['clf'], [ [SVC(kernel='linear')], [NuSVC(kernel='linear')], ]) def test_explain_linear_unsupported_multiclass(clf, newsgroups_train): docs, y, target_names = newsgroups_train vec = TfidfVectorizer() clf.fit(vec.fit_transform(docs), y) expl = explain_weights(clf, vec=vec)
def all_regressor_models(): models = [] metrix = [] train_accuracy = [] test_accuracy = [] models.append(('LinearRegression', LinearRegression())) models.append(('DecisionTreeRegressor', DecisionTreeRegressor())) models.append(('RandomForestRegressor', RandomForestRegressor())) models.append(('BaggingRegressor', BaggingRegressor())) models.append(('GradientBoostingRegressor', GradientBoostingRegressor())) models.append(('AdaBoostRegressor', AdaBoostRegressor())) models.append(('SVR', SVR())) models.append(('KNeighborsRegressor', KNeighborsRegressor())) models.append(('ARDRegression', ARDRegression())) models.append(('BayesianRidge', BayesianRidge())) models.append(('ElasticNet', ElasticNet())) models.append(('ElasticNetCV', ElasticNetCV())) models.append(('Lars', Lars())) models.append(('LassoCV', LassoCV())) models.append(('LassoLars', LassoLars())) models.append(('LassoLarsCV', LassoLarsCV())) models.append(('MultiTaskElasticNet', MultiTaskElasticNet())) models.append(('MultiTaskLasso', MultiTaskLasso())) models.append(('MultiTaskLassoCV', MultiTaskLassoCV())) models.append(('OrthogonalMatchingPursuit', OrthogonalMatchingPursuit())) models.append(('OrthogonalMatchingPursuitCV', OrthogonalMatchingPursuitCV())) models.append(('PassiveAggressiveClassifier', PassiveAggressiveClassifier())) models.append(('RANSACRegressor', RANSACRegressor())) models.append(('Ridge', Ridge())) models.append(('RidgeCV', RidgeCV())) models.append(('SGDRegressor', SGDRegressor())) models.append(('TheilSenRegressor', TheilSenRegressor())) models.append(('TransformedTargetRegressor', TransformedTargetRegressor())) models.append(('LinearSVR', LinearSVR())) models.append(('NuSVR', NuSVR())) models.append(('MLPRegressor', MLPRegressor())) models.append(('CCA', CCA())) models.append(('PLSRegression', PLSRegression())) models.append(('PLSCanonical', PLSCanonical())) models.append(('GaussianProcessClassifier', GaussianProcessClassifier())) models.append(('GradientBoostingRegressor', GradientBoostingRegressor())) models.append(('HistGradientBoostingRegressor', HistGradientBoostingRegressor())) estimators = [('lr', RidgeCV()),('svr', LinearSVR(random_state=42))] models.append(('StackingRegressor', StackingRegressor(estimators=estimators,final_estimator=RandomForestRegressor(n_estimators=10,random_state=42)))) r1 = LinearRegression() r2 = RandomForestRegressor(n_estimators=10, random_state=1) models.append(('VotingRegressor', VotingRegressor([('lr', r1), ('rf', r2)]))) models.append(('ExtraTreesRegressor', ExtraTreesRegressor())) models.append(('IsotonicRegression', IsotonicRegression())) models.append(('KernelRidge', KernelRidge())) models.append(('RadiusNeighborsClassifier', RadiusNeighborsClassifier())) test_acc=[] names=[] for name, model in models: try: m = model m.fit(X_train, y_train) y_pred = m.predict(X_test) r_square = r2_score(y_test,y_pred) rmse = np.sqrt(mean_squared_error(y_test,y_pred)) test_acc.append(r_square) names.append(name) #print(name," ( r_square , rmse) is: ", r_square, rmse) metrix.append((name, r_square, rmse)) except: print("Excepton Occured : ",name) return metrix,test_acc,names
def test_convert_nusvr_default(self): model, X = self._fit_binary_classification(NuSVR()) model_onnx = convert_sklearn( model, "SVR", [("input", FloatTensorType([1, X.shape[1]]))]) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnRegNuSVR2")