示例#1
0
    def dump(self, dictionary, path, fitter=None):
        bases = dictionary.shape[1]
        assert (bases < dictionary.shape[0])
        backup_fitter = lm.OrthogonalMatchingPursuit(4)

        if fitter is None:
            fitter = backup_fitter
        coefs = []

        bad_samples = 0
        for f in self.files:
            x = self.load_sample(f).T
            fitter.fit(dictionary, x)
            b = None
            if absmax(fitter.coef_) != 0:
                b = np.copy(fitter.coef_)
            else:
                backup_fitter.fit(dictionary, x)
                if absmax(backup_fitter.coef_) != 0:
                    b = np.copy(backup_fitter.coef_)

            if b is not None:
                coefs.append(b)
            else:
                bad_samples += 1

        X = make_mxe_loadable(dictionary.T.astype(np.float32), 16)
        Y = np.array(coefs).astype(np.float32)
        np.save(path, X)
        np.save(path + " coefficients", Y)
示例#2
0
def get_regression_models():
    models = [('LR', linear_model.LinearRegression()),
              ('R', linear_model.Ridge()),
              ('Lo', linear_model.Lasso(alpha=.015)),
              ('La', linear_model.Lars(positive=True)),
              ('OMP', linear_model.OrthogonalMatchingPursuit()),
              ('BR', linear_model.BayesianRidge()),
              ('GB',
               ensemble.GradientBoostingRegressor(alpha=0.9,
                                                  criterion='friedman_mse',
                                                  init=None,
                                                  learning_rate=0.1,
                                                  loss='ls',
                                                  max_depth=5,
                                                  max_features=None,
                                                  min_samples_leaf=1,
                                                  min_samples_split=2,
                                                  min_weight_fraction_leaf=.0,
                                                  n_estimators=400,
                                                  presort='auto',
                                                  random_state=None,
                                                  subsample=1.0,
                                                  verbose=0,
                                                  warm_start=False)),
              ('RF', ensemble.RandomForestRegressor()),
              ('AB', ensemble.AdaBoostRegressor())]
    return models
示例#3
0
def test_sk_OrthogonalMatchingPursuit():
    print("Testing sklearn, OrthogonalMatchingPursuit...")
    mod = linear_model.OrthogonalMatchingPursuit()
    X, y = iris_data
    mod.fit(X, y)
    docs = {'name': "OrthogonalMatchingPursuit test"}
    fv = X[0, :]
    upload(mod, fv, docs)
示例#4
0
 def orthogonalMatchingPursuit(self):
     omp = linear_model.OrthogonalMatchingPursuit(n_nonzero_coefs=10)
     omp.fit(self.training_order_start_end_districts_and_time,
             self.training_number_of_orders)
     predicted_number_of_orders = omp.predict(
         self.testing_order_start_end_districts_and_time)
     current_ride_prediction_error = numpy.mean(
         (predicted_number_of_orders - self.testing_number_of_orders)**2)
     print(current_ride_prediction_error)
     print(omp.coef_)
示例#5
0
    def _train(self):
        x = self._train_set.features
        y = self._train_set.outputs

        self._transform = preprocessing.PolynomialFeatures(3)

        clf = linear_model.OrthogonalMatchingPursuit(n_nonzero_coefs=235,
                                                     fit_intercept=True)
        clf.fit(self._transform.fit_transform(x, y), y)

        self._model = clf.predict
示例#6
0
    def __init__(self, method, params, i=0):
        self.algorithm_list = [
            'PLS', 'GP', 'OLS', 'OMP', 'Lasso', 'Elastic Net', 'Ridge',
            'Bayesian Ridge', 'ARD', 'LARS', 'LASSO LARS', 'SVR', 'KRR', 'GBR'
        ]
        self.method = method
        self.outliers = None
        self.ransac = False

        #print(params)
        if self.method[i] == 'PLS':
            self.model = PLSRegression(**params[i])

        if self.method[i] == 'OLS':
            self.model = linear.LinearRegression(**params[i])

        if self.method[i] == 'OMP':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.OrthogonalMatchingPursuit(**params_temp)

        if self.method[i] == 'LASSO':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Lasso(**params_temp)

        if self.method[i] == 'Elastic Net':
            params_temp = copy.copy(params[i])
            self.model = linear.ElasticNet(**params_temp)

        if self.method[i] == 'Ridge':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Ridge(**params_temp)

        if self.method[i] == 'BRR':
            self.model = linear.BayesianRidge(**params[i])

        if self.method[i] == 'ARD':
            self.model = linear.ARDRegression(**params[i])

        if self.method[i] == 'LARS':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Lars(**params_temp)

        if self.method[i] == 'SVR':
            self.model = svm.SVR(**params[i])

        if self.method[i] == 'KRR':
            self.model = kernel_ridge.KernelRidge(**params[i])
示例#7
0
 def test_model_orthogonal_matching_pursuit(self):
     model, X = fit_regression_model(
         linear_model.OrthogonalMatchingPursuit())
     model_onnx = convert_sklearn(
         model,
         "orthogonal matching pursuit",
         [("input", FloatTensorType([None, X.shape[1]]))],
         target_opset=TARGET_OPSET)
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X,
                         model,
                         model_onnx,
                         verbose=False,
                         basename="SklearnOrthogonalMatchingPursuit-Dec4")
示例#8
0
def sklearn_liner_model_regressions(xTrain, xTest, yTrain, yTest):
    modelForConsideration: DataFrame = pd.DataFrame()
    LinerModels = \
        [
            linear_model.ARDRegression(), linear_model.BayesianRidge(), linear_model.ElasticNet(),
            linear_model.ElasticNetCV(),
            linear_model.HuberRegressor(), linear_model.Lars(), linear_model.LarsCV(), linear_model.Lasso(),
            linear_model.LassoCV(), linear_model.LassoLars(), linear_model.LassoLarsCV(), linear_model.LassoLarsIC(),
            linear_model.LinearRegression(), linear_model.MultiTaskLasso(),
            linear_model.MultiTaskElasticNet(), linear_model.MultiTaskLassoCV(), linear_model.MultiTaskElasticNetCV(),
            linear_model.OrthogonalMatchingPursuit(),
            linear_model.OrthogonalMatchingPursuitCV(), linear_model.PassiveAggressiveClassifier(),
            linear_model.PassiveAggressiveRegressor(), linear_model.Perceptron(),
            linear_model.RANSACRegressor(), linear_model.Ridge(), linear_model.RidgeClassifier(),
            linear_model.RidgeClassifierCV(),
            linear_model.RidgeCV(), linear_model.SGDClassifier(), linear_model.SGDRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.enet_path(xTrain, yTrain),
            linear_model.lars_path(xTrain, yTrain), linear_model.lasso_path(xTrain, yTrain),
            # linear_model.LogisticRegression()
            # ,linear_model.LogisticRegressionCV(),linear_model.logistic_regression_path(xTrain, yTrain), linear_model.orthogonal_mp(xTrain, yTrain), linear_model.orthogonal_mp_gram(), linear_model.ridge_regression()
        ]
    for model in LinerModels:
        modelName: str = model.__class__.__name__
        try:
            # print(f"Preparing Model {modelName}")
            if modelName == "LogisticRegression":
                model = linear_model.LogisticRegression(random_state=0)
            model.fit(xTrain, yTrain)
            yTrainPredict = model.predict(xTrain)
            yTestPredict = model.predict(xTest)
            errorList = calculate_prediction_error(modelName, yTestPredict,
                                                   yTest, yTrainPredict,
                                                   yTrain)

            if errorList["Test Average Error"][0] < 30 and errorList[
                    "Train Average Error"][0] < 30:
                try:
                    modelForConsideration = modelForConsideration.append(
                        errorList)
                except (Exception) as e:
                    print(e)

        except (Exception, ArithmeticError) as e:
            print(f"Error occurred while preparing Model {modelName}")
    return modelForConsideration
 def test_model_orthogonal_matching_pursuit(self):
     model, X = fit_regression_model(
         linear_model.OrthogonalMatchingPursuit())
     model_onnx = convert_sklearn(
         model, "orthogonal matching pursuit",
         [("input", FloatTensorType([None, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X,
         model,
         model_onnx,
         verbose=False,
         basename="SklearnOrthogonalMatchingPursuit-Dec4",
         allow_failure="StrictVersion("
         "onnxruntime.__version__)"
         "<= StrictVersion('0.2.1')",
     )
示例#10
0
 def dtc08(self):
     #将y转化为一维形式:self.y_train,self.y_test
     self.y01_train = list()
     self.y01_test = list()
     for a in range(len(self.y_train)):
         self.y01_train.append(self.y_train[a][0])
     for b in range(len(self.y_test)):
         self.y01_test.append(self.y_test[b][0])
     
     if not self.om_edit.text().strip():
         self.om_alpha = None
     else:
         self.om_alpha = float(self.om_edit.text())
     #LR算法实现
     self.clf_om = linear_model.OrthogonalMatchingPursuit(n_nonzero_coefs = self.om_alpha) 
     self.clf_om.fit(self.x_train, self.y01_train)
     self.y_pred = self.clf_om.predict(self.x_test)
     self.x_pred = self.clf_om.predict(self.x_train)
     #设置值
     self.stab(self.om_table02, self.om_table03)
     self.eetab(self.om_table01)
示例#11
0
    def choose_model(self, X, y):
        """
        Automatic model chooser.

        :param X: data
        :param y: target

        :type X: ndarray or scipy.sparse matrix, (n_samples, n_features)
        :type y: ndarray, shape (n_samples,) or (n_samples, n_targets)
        """
        #{'linear', 'polynomial',logistic','logisticcv','elasticnet','elasticnetcv','orthogonal','orthogonalcv','theil','sgd','perceptron','passive_aggressive'}
        models = {
            'linear': linear_model.LinearRegression(),
            'logistic': linear_model.LogisticRegression(),
            'elasticnet': linear_model.ElasticNet(),
            'orthogonal': linear_model.OrthogonalMatchingPursuit(),
            'theil': linear_model.TheilSenRegressor(),
            'sgd': linear_model.SGDRegressor(),
            'passive_agressive': linear_model.PassiveAggressiveRegressor()
        }
        scores = {}
        for name, model in models.items():
            scores[name] = []

        sss = StratifiedShuffleSplit(10, 0.25)
        for train_index, test_index in sss.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            for name, model in models:
                mode.fit(X_train, y_train)
                scores[name].append(metrics.mean_squared_error(X_test, y_test))

        #Choose http://blog.minitab.com/blog/adventures-in-statistics-2/how-to-choose-the-best-regression-model
        index = None
        for name, model in models:
            min = 10000
            if scores[name][-1] < min:
                min = scores[name][-1]
                self._model = model
示例#12
0
def get_stats(path):
    info = pd.read_csv(path)
    info = info.dropna()

    f = info['price'] < 100000
    info = info[f]  # Get information only about flats with price < 100'000

    X = info[['type', 'size', 'locality']].values
    scaler_X = preprocessing.StandardScaler().fit(X)
    X = scaler_X.transform(X)
    y = info['price'].values

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=20)

    estimators = [
        linear_model.LinearRegression(),
        linear_model.Ridge(alpha=0.1),
        linear_model.Lasso(alpha=0.1),
        linear_model.ElasticNet(alpha=0.01, l1_ratio=0.25),
        linear_model.BayesianRidge(n_iter=500),
        linear_model.OrthogonalMatchingPursuit(),
        linear_model.SGDRegressor(max_iter=2500, epsilon=0.01),
        SVR(kernel='rbf', epsilon=0.01, C=20)
    ]

    estimator_values = np.array([])

    for e in estimators:
        e.fit(X_train, y_train)
        this_err = metrics.median_absolute_error(y_test, e.predict(X_test))
        estimator_values = np.append(estimator_values, this_err)

    return estimator_values
示例#13
0
    def grid_Search(self):
        '''
		this function is used to evaluate various regression methods
		'''
        rs = 1
        ests = [
            linear_model.LinearRegression(),
            linear_model.Ridge(),
            linear_model.Lasso(),
            linear_model.ElasticNet(),
            linear_model.BayesianRidge(),
            linear_model.OrthogonalMatchingPursuit(),
            ensemble.GradientBoostingRegressor()
        ]
        ests_labels = np.array([
            'Linear', 'Ridge', 'Lasso', 'ElasticNet', 'BayesRidge', 'OMP',
            'GradientBoostRegressor'
        ])
        errvals = np.array([])

        for e in ests:
            e.fit(self.X_train, self.y_train)
            this_err = mean_squared_error(self.y_test, e.predict(self.X_test))
            #print"got error %0.2f" % this_err
            errvals = np.append(errvals, math.sqrt(this_err))

        pos = np.arange(errvals.shape[0])
        srt = np.argsort(errvals)
        plt.figure(figsize=(12, 10))
        plt.bar(pos, errvals[srt], align='center')
        plt.xticks(pos, ests_labels[srt])
        plt.xlabel('Estimator')
        plt.ylabel('Root Mean Square Error')
        plt.show()

        return
LINEAR_MODELS = {
    "none":
    None,
    "linear":
    linear_model.LinearRegression(fit_intercept=False),
    "elastic_net":
    linear_model.MultiTaskElasticNet(alpha=0.0001, fit_intercept=False),
    "lasso":
    linear_model.MultiTaskLasso(alpha=0.001, fit_intercept=False),
    "lasso_lars":
    linear_model.LassoLars(alpha=0.0001, fit_intercept=False),
    "lars":
    linear_model.Lars(n_nonzero_coefs=10, fit_intercept=False),
    "matching_pursuit":
    linear_model.OrthogonalMatchingPursuit(n_nonzero_coefs=10,
                                           fit_intercept=False),
    "ridge":
    linear_model.Ridge(alpha=0.1, fit_intercept=False),
}


@pytest.fixture
def samples(joint):
    return joint.sample(1000, rule="sobol")


@pytest.fixture
def evaluations(model_solver, samples):
    return numpy.array([model_solver(sample) for sample in samples.T])

示例#15
0
def fit_regression(P, x, u, rule="LS", retall=False, **kws):
    """
    Fit a polynomial chaos expansion using linear regression.

    Args:
        P (Poly) : Polynomial expansion with `P.shape=(M,)` and `P.dim=D`.
        x (array_like) : Collocation nodes with `x.shape=(D,K)`.
        u (array_like) : Model evaluations with `len(u)=K`.
        retall (bool) : If True return Fourier coefficients in addition to R.
        rule (str) : Regression method used.

    Returns:
        (Poly, np.ndarray) : Fitted polynomial with `R.shape=u.shape[1:]` and
                `R.dim=D`. The Fourier coefficients in the estimation.

    Examples:
        >>> x, y = cp.variable(2)
        >>> P = cp.Poly([1, x, y])
        >>> s = [[-1,-1,1,1], [-1,1,-1,1]]
        >>> u = [0,1,1,2]
        >>> print(cp.around(cp.fit_regression(P, s, u), 14))
        0.5q0+0.5q1+1.0
    """
    x = np.array(x)
    if len(x.shape) == 1:
        x = x.reshape(1, *x.shape)
    u = np.array(u)

    Q = P(*x).T
    shape = u.shape[1:]
    u = u.reshape(u.shape[0], int(np.prod(u.shape[1:])))

    rule = rule.upper()

    # Local rules
    if rule == "LS":
        uhat = linalg.lstsq(Q, u)[0].T

    elif rule == "T":
        uhat, alphas = rlstsq(Q, u, kws.get("order", 0),
                              kws.get("alpha", None), False, True)
        uhat = uhat.T

    elif rule == "TC":
        uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), True)
        uhat = uhat.T

    else:

        # Scikit-learn wrapper
        try:
            _ = linear_model
        except:
            raise NotImplementedError("sklearn not installed")

        if rule == "BARD":
            solver = linear_model.ARDRegression(fit_intercept=False,
                                                copy_X=False,
                                                **kws)

        elif rule == "BR":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.BayesianRidge(**kws)

        elif rule == "EN":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.ElasticNet(**kws)

        elif rule == "ENC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.ElasticNetCV(**kws)

        elif rule == "LA":  # success
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.Lars(**kws)

        elif rule == "LAC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LarsCV(**kws)

        elif rule == "LAS":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.Lasso(**kws)

        elif rule == "LASC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoCV(**kws)

        elif rule == "LL":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoLars(**kws)

        elif rule == "LLC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoLarsCV(**kws)

        elif rule == "LLIC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoLarsIC(**kws)

        elif rule == "OMP":
            solver = linear_model.OrthogonalMatchingPursuit(**kws)

        uhat = solver.fit(Q, u).coef_

    u = u.reshape(u.shape[0], *shape)

    R = cp.poly.sum((P * uhat), -1)
    R = cp.poly.reshape(R, shape)

    if retall == 1:
        return R, uhat

    elif retall == 2:
        if rule == "T":
            return R, uhat, Q, alphas
        return R, uhat, Q

    return R
示例#16
0
        classification(svm.SVC(kernel="rbf", **SVC_PARAMS)),
        classification(svm.NuSVC(kernel="rbf", **SVC_PARAMS)),

        # Linear Regression
        regression(linear_model.LinearRegression()),
        regression(linear_model.HuberRegressor()),
        regression(linear_model.ElasticNet(random_state=RANDOM_SEED)),
        regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)),
        regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.Lars()),
        regression(linear_model.LarsCV()),
        regression(linear_model.Lasso(random_state=RANDOM_SEED)),
        regression(linear_model.LassoCV(random_state=RANDOM_SEED)),
        regression(linear_model.LassoLars()),
        regression(linear_model.LassoLarsIC()),
        regression(linear_model.OrthogonalMatchingPursuit()),
        regression(linear_model.OrthogonalMatchingPursuitCV()),
        regression(linear_model.Ridge(random_state=RANDOM_SEED)),
        regression(linear_model.RidgeCV()),
        regression(linear_model.BayesianRidge()),
        regression(linear_model.ARDRegression()),
        regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)),
        regression(
            linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)),

        # Logistic Regression
        classification(
            linear_model.LogisticRegression(random_state=RANDOM_SEED)),
        classification(
            linear_model.LogisticRegressionCV(random_state=RANDOM_SEED)),
        classification(linear_model.RidgeClassifier(random_state=RANDOM_SEED)),
示例#17
0
def train_sr(X_train, X_test, y_train, y_test, common_name_model, problemtype,
             classes, default_features, transform_model, modeldir, settings):

    # metrics
    modeltypes = list()
    explained_variances = list()
    mean_absolute_errors = list()
    mean_squared_errors = list()
    median_absolute_errors = list()
    r2_scores = list()

    print(modeldir)
    os.chdir(modeldir)

    # make a temp folder to dump files into
    foldername = ''
    foldername = common_name_model + '_temp'
    tempdir = os.getcwd() + '/' + foldername

    try:
        os.mkdir(foldername)
        os.chdir(foldername)
    except:
        shutil.rmtree(foldername)
        os.mkdir(foldername)
        os.chdir(foldername)

    # metrics.explained_variance_score(y_true, y_pred)  Explained variance regression score function
    # metrics.mean_absolute_error(y_true, y_pred)   Mean absolute error regression loss
    # metrics.mean_squared_error(y_true, y_pred[, …])   Mean squared error regression loss
    # metrics.mean_squared_log_error(y_true, y_pred)    Mean squared logarithmic error regression loss
    # metrics.median_absolute_error(y_true, y_pred) Median absolute error regression loss
    # metrics.r2_score(y_true, y_pred[, …]) R^2 (coefficient of determination) regression score function.

    ##################################################
    ##               linear regression              ##
    ##################################################
    '''
	LinearRegression fits a linear model with coefficients w = (w_1, ..., w_p)
	to minimize the residual sum of squares between the observed responses
	in the dataset, and the responses predicted by the linear approximation.

	Example:
	http://scikit-learn.org/stable/modules/linear_model.html
	'''
    try:
        ols = linear_model.LinearRegression()
        ols.fit(X_train, y_train)
        #ols.predict(X_test, y_test)
        predictions = cross_val_predict(ols, X_test, y_test, cv=6)
        f = open('ols.pickle', 'wb')
        pickle.dump(ols, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('linear regression')
    except:
        print('error - ORDINARY LEAST SQUARES')

    ##################################################
    ##              Ridge regression                ##
    ##################################################
    '''
	Ridge regression addresses some of the problems of
	Ordinary Least Squares by imposing a penalty on the
	size of coefficients.

	The ridge coefficients minimize a penalized residual sum of squares.

	Example:
	http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge

	'''
    try:
        ridge = linear_model.Ridge(fit_intercept=True,
                                   alpha=0.0,
                                   random_state=0,
                                   normalize=True)
        ridge.fit(X_train, y_train)
        predictions = cross_val_predict(ridge, X_test, y_test, cv=6)
        f = open('ridge.pickle', 'wb')
        pickle.dump(ridge, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('ridge regression')
    except:
        print('error - RIDGE REGRESSION')

    ##################################################
    ##                    LASSO                     ##
    ##################################################
    '''
	The Lasso is a linear model that estimates sparse coefficients.
	It is useful in some contexts due to its tendency to prefer solutions
	with fewer parameter values, effectively reducing the number of
	variables upon which the given solution is dependent.

	For this reason, the Lasso and its variants are fundamental
	to the field of compressed sensing. Under certain conditions,
	it can recover the exact set of non-zero weights
	(see Compressive sensing: tomography reconstruction with L1 prior (Lasso)).

	Example:
	http://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_model_selection.html#sphx-glr-auto-examples-linear-model-plot-lasso-model-selection-py

	'''
    try:
        lasso = linear_model.Lasso(alpha=0.1)
        lasso.fit(X_train, y_train)
        predictions = cross_val_predict(lasso, X_test, y_test, cv=6)
        f = open('lasso.pickle', 'wb')
        pickle.dump(lasso, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('LASSO')
    except:
        print('error - LASSO')

    ##################################################
    ##              Multi-task LASSO                ##
    ##################################################
    '''
	The MultiTaskLasso is a linear model that estimates
	sparse coefficients for multiple regression problems
	jointly: y is a 2D array, of shape (n_samples, n_tasks).
	The constraint is that the selected features are the same
	for all the regression problems, also called tasks.

	Example:
	http://scikit-learn.org/stable/auto_examples/linear_model/plot_multi_task_lasso_support.html#sphx-glr-auto-examples-linear-model-plot-multi-task-lasso-support-py

	'''
    # # ONLY WORKS ON y_train that is multidimensional (one hot encoded)
    # # Generate some 2D coefficients with sine waves with random frequency and phase
    # mlasso = linear_model.MultiTaskLasso(alpha=0.1)
    # mlasso.fit(X_train, y_train)
    # predictions = cross_val_predict(mlasso, X_test, y_test, cv=6)
    # accuracy = metrics.r2_score(y_test, predictions)

    ##################################################
    ##                  Elastic net                 ##
    ##################################################
    '''
	ElasticNet is a linear regression model trained with L1 and L2 prior as regularizer.
	This combination allows for learning a sparse model where few of the weights are non-zero
	like Lasso, while still maintaining the regularization properties of Ridge.

	We control the convex combination of L1 and L2 using the l1_ratio parameter.

	Example:
	http://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_and_elasticnet.html#sphx-glr-auto-examples-linear-model-plot-lasso-and-elasticnet-py

	'''
    # need training data
    try:
        enet = linear_model.ElasticNet()
        enet.fit(X_train, y_train)
        predictions = cross_val_predict(enet, X_test, y_test, cv=6)
        f = open('enet.pickle', 'wb')
        pickle.dump(enet, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            ytest, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('elastic net')
    except:
        print('error - ELASTIC NET')

    ##################################################
    ##            Multi-task elastic net            ##
    ##################################################
    '''
	The MultiTaskElasticNet is an elastic-net model that estimates sparse coefficients
	for multiple regression problems jointly: Y is a 2D array, of shape (n_samples, n_tasks).

	The constraint is that the selected features are the same for all the regression problems,
	also called tasks.

	Example:
	http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.MultiTaskElasticNet.html
	'''
    # # # ONLY WORKS ON y_train that is multidimensional (one hot encoded)
    # clf = linear_model.MultiTaskElasticNet()
    # clf.fit(X_train, y_train)
    # #print(clf.coef_)
    # #print(clf.intercept_)

    ##################################################
    ##          Least angle regression (LARS)       ##
    ##################################################
    '''
	The advantages of LARS are:

	-> It is numerically efficient in contexts where p >> n (i.e., when the number of dimensions is significantly greater than the number of points)
	-> It is computationally just as fast as forward selection and has the same order of complexity as an ordinary least squares.
	-> It produces a full piecewise linear solution path, which is useful in cross-validation or similar attempts to tune the model.
	-> If two variables are almost equally correlated with the response, then their coefficients should increase at approximately the same rate. The algorithm thus behaves as intuition would expect, and also is more stable.
	-> It is easily modified to produce solutions for other estimators, like the Lasso.

	The disadvantages of the LARS method include:

	-> Because LARS is based upon an iterative refitting of the residuals,
	-> it would appear to be especially sensitive to the effects of noise.

	Example:
	http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lars.html
	'''
    try:
        lars = linear_model.Lars(n_nonzero_coefs=1)
        lars.fit(X_train, y_train)
        predictions = cross_val_predict(lars, X_test, y_test, cv=6)
        f = open('lars.pickle', 'wb')
        pickle.dump(lars, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('Least angle regression (LARS)')
    except:
        print('error - LARS')

    ##################################################
    ##                 LARS LASSO                   ##
    ##################################################
    '''
	LassoLars is a lasso model implemented using the LARS algorithm,
	and unlike the implementation based on coordinate_descent,
	this yields the exact solution, which is piecewise linear
	as a function of the norm of its coefficients.

	Example:
	http://scikit-learn.org/stable/modules/linear_model.html#passive-aggressive-algorithms

	'''
    try:
        lars_lasso = linear_model.LassoLars()
        lars_lasso.fit(X_train, y_train)
        predictions = cross_val_predict(lars_lasso, X_test, y_test, cv=6)
        f = open('lars_lasso.pickle', 'wb')
        pickle.dump(lars_lasso, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('LARS lasso')
    except:
        print('error - LARS LASSO')

    ##################################################
    ##      Orthogonal Matching Pursuit (OMP)       ##
    ##################################################
    '''
	OrthogonalMatchingPursuit and orthogonal_mp implements the OMP
	algorithm for approximating the fit of a linear model with
	constraints imposed on the number of non-zero coefficients (ie. the L 0 pseudo-norm).

	Example:
	http://scikit-learn.org/stable/auto_examples/linear_model/plot_omp.html#sphx-glr-auto-examples-linear-model-plot-omp-py
	'''
    try:
        omp = linear_model.OrthogonalMatchingPursuit()
        omp.fit(X_train, y_train)
        predictions = cross_val_predict(omp, X_test, y_test, cv=6)
        f = open('omp.pickle', 'wb')
        pickle.dump(omp, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('orthogonal matching pursuit (OMP)')
    except:
        print('error - ORTHOGONAL MATCHING PURSUIT (OMP)')

    ##################################################
    ##          Bayesian ridge regression           ##
    ##################################################
    '''
	The advantages of Bayesian Regression are:

	-> It adapts to the data at hand.
	-> It can be used to include regularization parameters in the estimation procedure.

	The disadvantages of Bayesian regression include:

	-> Inference of the model can be time consuming.

	Example:
	http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.BayesianRidge.html
	'''
    # MULTI-DIMENSIONAL
    # clf = BayesianRidge()
    # clf.fit(X_train, y_train)
    # predictions = cross_val_predict(clf, X_test, y_test, cv=6)
    # accuracy = metrics.r2_score(y_test, predictions)

    ##################################################
    ##      Automatic relevance determination       ##
    ##################################################
    '''
	ARDRegression is very similar to Bayesian Ridge Regression,
	but can lead to sparser weights w [1] [2]. ARDRegression poses
	a different prior over w, by dropping the assumption of
	the Gaussian being spherical.

	Example:
	http://scikit-learn.org/stable/auto_examples/linear_model/plot_ard.html#sphx-glr-auto-examples-linear-model-plot-ard-py
	'''
    # MULTI-DIMENSIONAL
    # clf = ARDRegression(compute_score=True)
    # clf.fit(X_train, y_train)
    # predictions = cross_val_predict(clf, X_test, y_test, cv=6)
    # accuracy = metrics.r2_score(y_test, predictions)

    ##################################################
    ##              Logistic regression             ##
    ##################################################
    '''
	Logistic regression, despite its name, is a linear model
	for classification rather than regression. Logistic regression
	is also known in the literature as logit regression,
	maximum-entropy classification (MaxEnt) or the log-linear classifier.

	In this model, the probabilities describing the possible outcomes
	of a single trial are modeled using a logistic function.

	Example:
	http://scikit-learn.org/stable/auto_examples/linear_model/plot_logistic_l1_l2_sparsity.html#sphx-glr-auto-examples-linear-model-plot-logistic-l1-l2-sparsity-py
	'''
    try:
        lr = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
        lr.fit(X_train, y_train)
        predictions = cross_val_predict(lr, X_test, y_test, cv=6)
        f = open('lr.pickle', 'wb')
        pickle.dump(lr, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('logistic regression')
    except:
        print('error - LOGISTIC REGRESSION')

    ##################################################
    ##      Stochastic gradient descent (SGD)       ##
    ##################################################
    '''
	Stochastic gradient descent is a simple yet very efficient
	approach to fit linear models. It is particularly useful
	when the number of samples (and the number of features) is very large.
	The partial_fit method allows only/out-of-core learning.

	The classes SGDClassifier and SGDRegressor provide functionality
	to fit linear models for classification and regression using
	different (convex) loss functions and different penalties.
	E.g., with loss="log", SGDClassifier fits a logistic regression model,
	while with loss="hinge" it fits a linear support vector machine (SVM).

	Example:
	http://scikit-learn.org/stable/modules/sgd.html#sgd
	'''
    try:
        # note you have to scale the data, as SGD algorithms are sensitive to
        # feature scaling
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train_2 = scaler.transform(X_train)
        X_test_2 = scaler.transform(X_test)
        sgd = linear_model.SGDRegressor()
        sgd.fit(X_train_2, y_train)
        predictions = cross_val_predict(sgd, X_test_2, y_test, cv=6)
        f = open('sgd.pickle', 'wb')
        pickle.dump(sgd, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('stochastic gradient descent (SGD)')
    except:
        print('error - STOCHASTIC GRADIENT DESCENT')

    ##################################################
    ##          Perceptron algorithms               ##
    ##################################################
    '''
	Multi-layer Perceptron is sensitive to feature scaling,
	so it is highly recommended to scale your data.
	For example, scale each attribute on the input vector X to [0, 1] or [-1, +1],
	or standardize it to have mean 0 and variance 1.

	Note that you must apply the same scaling to the test
	set for meaningful results. You can use StandardScaler for standardization.

	change the solver to 'lbfgs'. The default'adam' is a SGD-like method,
	hich is effective for large & messy data but pretty useless for this kind of smooth & small data.

	Example:
	http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.PassiveAggressiveRegressor.html#sklearn.linear_model.PassiveAggressiveRegressor
	'''
    try:
        nn = MLPRegressor(solver='lbfgs')
        nn.fit(X_train, y_train)
        predictions = cross_val_predict(nn, X_test, y_test, cv=6)
        f = open('nn.pickle', 'wb')
        pickle.dump(nn, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('perceptron')
    except:
        print('error - MLP REGRESSOR')

    ##################################################
    ##          Passive-agressive algorithms        ##
    ##################################################
    '''
	The passive-aggressive algorithms are a family of algorithms
	for large-scale learning. They are similar to the Perceptron
	in that they do not require a learning rate. However,
	contrary to the Perceptron, they include a regularization parameter C.

	Example:
	http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf
	'''
    try:
        pa_regr = linear_model.PassiveAggressiveRegressor(random_state=0)
        pa_regr.fit(X_train, y_train)
        predictions = cross_val_predict(pa_regr, X_test, y_test, cv=6)
        f = open('pa_regr.pickle', 'wb')
        pickle.dump(pa_regr, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('passive-agressive algorithm')
    except:
        print('error - PASSIVE-AGGRESSIVE')

    ##################################################
    ##                   RANSAC                     ##
    ##################################################
    '''
	When in doubt, use RANSAC

	RANSAC (RANdom SAmple Consensus) fits a model from random subsets of
	inliers from the complete data set.

	RANSAC is a non-deterministic algorithm producing only a reasonable
	result with a certain probability, which is dependent on the number
	of iterations (see max_trials parameter). It is typically used for
	linear and non-linear regression problems and is especially popular
	in the fields of photogrammetric computer vision.

	Example:
	http://scikit-learn.org/stable/auto_examples/linear_model/plot_ransac.html#sphx-glr-auto-examples-linear-model-plot-ransac-py
	'''
    try:
        ransac = linear_model.RANSACRegressor()
        ransac.fit(X_train, y_train)
        predictions = cross_val_predict(ransac, X_test, y_test, cv=6)
        f = open('ransac.pickle', 'wb')
        pickle.dump(ransac, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('RANSAC')
    except:
        print('error - RANSAC')

    ##################################################
    ##              Theil-SEN                       ##
    ##################################################
    '''
	The TheilSenRegressor estimator uses a generalization of the median
	in multiple dimensions. It is thus robust to multivariate outliers.

	Note however that the robustness of the estimator decreases quickly
	with the dimensionality of the problem. It looses its robustness
	properties and becomes no better than an ordinary least squares
	in high dimension.

	Note takes a bit longer to train.

	Example:
	http://scikit-learn.org/stable/auto_examples/linear_model/plot_theilsen.html#sphx-glr-auto-examples-linear-model-plot-theilsen-py

	'''
    try:
        theilsen = linear_model.TheilSenRegressor(random_state=42)
        theilsen.fit(X_train, y_train)
        predictions = cross_val_predict(theilsen, X_test, y_test, cv=6)
        f = open('theilsen.pickle', 'wb')
        pickle.dump(theilsen, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('Theil-Sen')
    except:
        print('error - THEILSEN')

    ##################################################
    ##              Huber Regression                ##
    ##################################################
    '''
	The HuberRegressor is different to Ridge because it applies a linear loss
	to samples that are classified as outliers. A sample is classified as an
	inlier if the absolute error of that sample is lesser than a certain threshold.

	It differs from TheilSenRegressor and RANSACRegressor because it does not
	ignore the effect of the outliers but gives a lesser weight to them.

	Example:
	http://scikit-learn.org/stable/auto_examples/linear_model/plot_huber_vs_ridge.html#sphx-glr-auto-examples-linear-model-plot-huber-vs-ridge-py
	'''
    try:
        huber = linear_model.HuberRegressor(fit_intercept=True,
                                            alpha=0.0,
                                            max_iter=100)
        huber.fit(X_train, y_train)
        predictions = cross_val_predict(huber, X_test, y_test, cv=6)
        f = open('huber.pickle', 'wb')
        pickle.dump(huber, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('huber regression')
    except:
        print('error - HUBER')

    ##################################################
    ##              Polynomial Regression           ##
    ##################################################
    '''
	One common pattern within machine learning is to use linear models trained on
	nonlinear functions of the data. This approach maintains the generally fast
	performance of linear methods, while allowing them to fit a much wider range of data.

	Example:
	http://scikit-learn.org/stable/modules/linear_model.html#passive-aggressive-algorithms

	'''
    try:
        poly_lr = Pipeline([('poly',
                             PolynomialFeatures(degree=5, include_bias=False)),
                            ('linreg', LinearRegression(normalize=True))])

        poly_lr.fit(X_train, y_train)
        predictions = cross_val_predict(poly_lr, X_test, y_test, cv=6)
        accuracy = metrics.r2_score(y_test, predictions)
        f = open('poly_lr.pickle', 'wb')
        pickle.dump(poly_lr, f)
        f.close()
        # get stats
        explained_variances, mean_absolute_errors, mean_squared_errors, median_absolute_errors, r2_scores = update_list(
            y_test, predictions, explained_variances, mean_absolute_errors,
            mean_squared_errors, median_absolute_errors, r2_scores)
        modeltypes.append('polynomial (linear regression)')
    except:
        print('error - POLYNOMIAL')

    ##################################################
    ##              Write session to .JSON          ##
    ##################################################

    os.chdir(modeldir)

    print('\n\n')
    print('RESULTS: \n')

    # print table in terminal
    table = BeautifulTable()
    table.column_headers = ["model type", "R^2 score", "Mean Absolute Errors"]
    print(len(modeltypes))
    print(len(r2_scores))
    print(len(mean_absolute_errors))

    for i in range(len(modeltypes)):
        table.append_row(
            [modeltypes[i],
             str(r2_scores[i]),
             str(mean_absolute_errors[i])])

    print(table)

    filename = common_name_model + '.xlsx'
    workbook = xlsxwriter.Workbook(filename)
    worksheet = workbook.add_worksheet()

    worksheet.write('A1', 'Model type')
    worksheet.write('B1', 'R^2 score')
    worksheet.write('C1', 'Explained Variances')
    worksheet.write('D1', 'Mean Absolute Errors')
    worksheet.write('E1', 'Mean Squared Log Errors')
    worksheet.write('F1', 'Median Absolute Errors')
    #worksheet.write('G1', 'Mean Squared Errors')

    # print the best model in terms of mean abolute error
    varnames = [
        'ols.pickle', 'ridge.pickle', 'lasso.pickle', 'enet.pickle',
        'lars.pickle', 'lars_lasso.pickle', 'omp.pickle', 'lr.pickle',
        'sgd.pickle', 'nn.pickle', 'pa_regr.pickle', 'ransac.pickle',
        'theilsen.pickle', 'huber.pickle', 'poly_lr.pickle'
    ]

    # make sure all numbers, make mae 10 (a large number, to eliminate it from the list of options)
    mae = mean_absolute_errors
    for i in range(len(mae)):
        if mae[i] == 'n/a':
            mae[i] = 10
        else:
            mae[i] = float(mae[i])

    # get minimim index and now delete temp folder, put master file in models directory
    minval = np.amin(mae)
    ind = mae.index(minval)
    print('%s has the lowest mean absolute error (%s)' %
          (modeltypes[ind], str(minval)))
    # rename file
    os.chdir(tempdir)
    newname = common_name_model + '.pickle'
    print('saving file to disk (%s)...' % (newname))
    os.rename(varnames[ind], newname)
    # move to models directory
    shutil.copy(os.getcwd() + '/' + newname, modeldir + '/' + newname)
    # now delete temp folder
    os.chdir(modeldir)
    shutil.rmtree(foldername)

    # output spreadsheet of results and open up for analyis
    for i in range(len(modeltypes)):
        try:
            worksheet.write('A' + str(i + 2), str(modeltypes[i]))
            worksheet.write('B' + str(i + 2), str(r2_scores[i]))
            worksheet.write('C' + str(i + 2), str(explained_variances[i]))
            worksheet.write('D' + str(i + 2), str(mean_absolute_errors[i]))
            worksheet.write('F' + str(i + 2), str(median_absolute_errors[i]))
            #worksheet.write('G'+str(i+2), str(mean_squared_errors[i]))

        except:
            pass

    workbook.close()

    files = list()
    files.append(common_name_model + '.xlsx')
    files.append(common_name_model + '.pickle')

    model_name = common_name_model + '.pickle'
    model_dir = os.getcwd()

    return model_name, model_dir, files
示例#18
0
class EnsembleRegressor(BaseEstimator, MetaEstimatorMixin, RegressorMixin):
    # Static member variables
    _ensemble_regressors_auto = (
        linear_model.LinearRegression(fit_intercept=True),
        Pipeline([('poly', PolynomialFeatures(degree=2)),
                  ('linear',
                   linear_model.LinearRegression(fit_intercept=False))]),
        KernelRegression(kernel='poly'),
        DecisionTreeRegressor(max_depth=4),
        DecisionTreeRegressor(max_depth=None),
        RandomForestRegressor(n_estimators=100),
    )

    _ensemble_possible_regressors = (
        linear_model.LinearRegression(fit_intercept=True),
        Pipeline([('poly', PolynomialFeatures(degree=2)),
                  ('linear',
                   linear_model.LinearRegression(fit_intercept=False))]),
        # # linear_model.Ridge(alpha=4, fit_intercept=True),
        KernelRegression(kernel='poly'),
        # linear_model.RidgeCV(alphas=[.01, .1, .3, .5, 1], fit_intercept=True),
        # # linear_model.Lasso(alpha=4, fit_intercept=True),
        # linear_model.LassoCV(n_alphas=100, fit_intercept=True, max_iter=5000),
        # linear_model.ElasticNet(alpha=1),
        # linear_model.ElasticNetCV(n_alphas=100, l1_ratio=.5),
        # linear_model.OrthogonalMatchingPursuit(),
        # linear_model.BayesianRidge(),
        # # linear_model.ARDRegression(),
        # linear_model.SGDRegressor(),
        # # linear_model.PassiveAggressiveRegressor(loss='squared_epsilon_insensitive'),
        # linear_model.RANSACRegressor(),
        # LinearSVR(max_iter=1e4, fit_intercept=True, loss='squared_epsilon_insensitive', C=0.5),
        # SVR(max_iter=1e4, kernel='poly', C=1, degree=4),
        # SVR(max_iter=1e4, kernel='rbf', C=1, gamma=0.1),
        # SVR(kernel='linear', C=1),
        # SVR(kernel='linear', C=0.5),
        # SVR(kernel='linear', C=0.1),
        # DecisionTreeRegressor(max_depth=5),
        DecisionTreeRegressor(max_depth=4),
        DecisionTreeRegressor(max_depth=None),
        RandomForestRegressor(n_estimators=100),
        # AdaBoostRegressor(learning_rate=0.9, loss='square'),
        # BaggingRegressor(),
        MLPRegressor())

    _ensemble_nn = [MLPRegressor(nb_epoch=1000) for _ in range(5)
                    ]  # 5 Multi Layer Perceptrons in the ensemble

    _ensemble_nn_large = [MLPRegressor(nb_epoch=500) for _ in range(10)
                          ]  # 5 Multi Layer Perceptrons in the ensemble

    _ensemble_ridge_regression = [
        linear_model.Ridge(alpha=alpha, fit_intercept=True, normalize=True)
        for alpha in np.arange(.1, 1, .2)
    ]  # 5 Ridge Regressors

    _ensemble_auto_large = (
        linear_model.LinearRegression(fit_intercept=True),
        Pipeline([('poly', PolynomialFeatures(degree=2)),
                  ('linear',
                   linear_model.LinearRegression(fit_intercept=False))]),
        linear_model.Ridge(alpha=0.5, fit_intercept=True, normalize=True),
        KernelRegression(kernel='poly'),
        # linear_model.RidgeCV(alphas=[.01, .1, .3, .5, 1], fit_intercept=True),
        linear_model.Lasso(alpha=0.1, fit_intercept=True),
        # linear_model.LassoCV(n_alphas=100, fit_intercept=True, max_iter=5000),
        # linear_model.ElasticNet(alpha=1),
        # linear_model.ElasticNetCV(n_alphas=100, l1_ratio=.5),
        linear_model.OrthogonalMatchingPursuit(),
        # linear_model.BayesianRidge(),
        # # linear_model.ARDRegression(),
        # linear_model.SGDRegressor(),
        # linear_model.PassiveAggressiveRegressor(loss='squared_epsilon_insensitive'),
        # linear_model.RANSACRegressor(),
        LinearSVR(max_iter=1e3,
                  fit_intercept=True,
                  loss='squared_epsilon_insensitive',
                  C=1),
        SVR(max_iter=1e3, kernel='poly', C=1, degree=3),
        SVR(max_iter=1e3, kernel='rbf', C=1),
        SVR(max_iter=1e3, kernel='sigmoid', C=1),
        # SVR(kernel='linear', C=1),
        # SVR(kernel='linear', C=0.5),
        # SVR(kernel='linear', C=0.1),
        # DecisionTreeRegressor(max_depth=5),
        DecisionTreeRegressor(max_depth=4),
        DecisionTreeRegressor(max_depth=None),
        RandomForestRegressor(n_estimators=100),
        AdaBoostRegressor(learning_rate=0.9, loss='square'),
        BaggingRegressor(),
        # MLPRegressor(num_hidden_units=5)
    )

    # self._ensemble_nn = [MLPRegressor(num_hidden_units=(i+6), nb_epoch=(i+6)*100) for i in range(5)]  # 5 Multi Layer Perceptrons in the ensemble

    def __init__(self, type='auto', verbose=False):
        '''
        :param type: Possible values: 'auto', 'mlp', 'mlp_large', 'ridge', 'auto_large' (defaults to 'auto').
                     Choice of set of regressors, 'auto' will use various standard regressors (usually linear
                     regression, NW-kernel, decision trees and random forests, but subject to change).
                     'mlp' will use 5 Multi-Layer Perceptrons, each with 10 hidden units, batch_size=32 and 1000 epochs.
                     'mlp_large' will use 10 MLPs, each with 10 hidden units, batch_size=32 and only 500 epochs.
                     'ridge' will train 5 ridge regressors with different alphas.
        :param verbose:
        '''
        self._verbose = verbose
        self.type = type.lower()  # convert type to lowercase

        if type == 'mlp':
            self.regressors = EnsembleRegressor._ensemble_nn
        elif type == 'mlp_large':
            self.regressors = EnsembleRegressor._ensemble_nn_large
        elif type == 'ridge':
            self.regressors = EnsembleRegressor._ensemble_ridge_regression
        elif type == 'auto_large':
            self.regressors = EnsembleRegressor._ensemble_auto_large
        else:
            self.regressors = EnsembleRegressor._ensemble_regressors_auto

        # set regressor labels
        self.regressor_labels = []
        self.regressor_count = len(self.regressors)
        for i, regr in enumerate(self.regressors):
            self.regressor_labels.append(str(regr))

    def _dprint(self, *args, **kwargs):
        """overload print() function to only print when verbose=True."""
        if self._verbose:
            return __builtin__.print(*args, **kwargs)

    def fit(self,
            X_train,
            y_train,
            samples_per_regressor=None,
            regressor_overlap=0):
        """ Fits the model for all the regression algorithms in the ensemble.
            The models themselves can be accessed directly at EnsembleRegressor.regressors,
            and their labels is accessible in EnsembleRegressor.regressor_labels.

        :param X_train: Data matrix. Shape [# samples, # features].
        :param y_train: Target value vector.
        :param samples_per_regressor: Number of samples from X_train that each regressor will be trained on.
                                      Default 'None' will cause all regressors to be trained on all samples.
        :param regressor_overlap: If samples_per_regressor is not None, this is the number of samples overlapping for
                                  every adjacent pair of regressors. Defaults to no overlap.
        """
        start_sample = 0
        if samples_per_regressor is None:
            end_sample = None
        else:
            end_sample = samples_per_regressor

        start = time.time()
        for i, regr in enumerate(self.regressors):
            self._dprint('## ' + str(i) + '. ' + str(regr))

            X = X_train[start_sample:end_sample, :]
            y = y_train[start_sample:end_sample]
            regr.fit(X, y)

            if samples_per_regressor is not None:
                start_sample = start_sample + samples_per_regressor - regressor_overlap
                end_sample = start_sample + samples_per_regressor

            if type(regr) in [
                    linear_model.LinearRegression, linear_model.Ridge,
                    LinearSVR
            ]:
                self._dprint('\tCoefficients: ',
                             ', '.join(['%.2f' % f for f in regr.coef_]))

            if hasattr(regr, 'alphas_'):
                self._dprint('\tAlphas: ',
                             ', '.join(['%.2f' % f for f in regr.alphas_]))

        self._dprint('Total running time: %.2f' % (time.time() - start))

    def predict(self, X):
        """
        :param X: Data matrix. Shape [# samples, # features].
        :return: Ensemble predictions. Shape [# regressors, # samples].
        """
        Z = np.ndarray(shape=(len(self.regressors), X.shape[0]))
        for i, regr in enumerate(self.regressors):
            # zip the real and predicted values together, sort them, and unzip them
            try:
                Z[i, :] = regr.predict(X)
            except:
                print(regr)
                raise

        return Z

    def score(self, X_test, y_test, **kwargs):
        """
        :return: vector with the R^2 score for each regressor
        """
        s = np.zeros(self.regressor_count)
        for i, regr in enumerate(self.regressors):
            try:
                s[i] = regr.score(X_test, y_test)
            except:
                print(regr)
                raise
        return s

    def mean_squared_error(self, X_test, y_test):
        """
        :return: vector with the MSE for each regressor
        """
        Z = self.predict(X_test)
        return np.mean((Z - y_test[None, :])**2, 1)
示例#19
0
def get_regression_estimators(r, regression_models):
    if r == 'ARDRegression':
        regression_models[r] = linear_model.ARDRegression()
    elif r == 'BayesianRidge':
        regression_models[r] = linear_model.BayesianRidge()
    elif r == 'ElasticNet':
        regression_models[r] = linear_model.ElasticNet()
    elif r == 'ElasticNetCV':
        regression_models[r] = linear_model.ElasticNetCV()
    elif r == 'HuberRegressor':
        regression_models[r] = linear_model.HuberRegressor()
    elif r == 'Lars':
        regression_models[r] = linear_model.Lars()
    elif r == 'LarsCV':
        regression_models[r] = linear_model.LarsCV()
    elif r == 'Lasso':
        regression_models[r] = linear_model.Lasso()
    elif r == 'LassoCV':
        regression_models[r] = linear_model.LassoCV()
    elif r == 'LassoLars':
        regression_models[r] = linear_model.LassoLars()
    elif r == 'LassoLarsCV':
        regression_models[r] = linear_model.LassoLarsCV()
    elif r == 'LassoLarsIC':
        regression_models[r] = linear_model.LassoLarsIC()
    elif r == 'LinearRegression':
        regression_models[r] = linear_model.LinearRegression()
    elif r == 'LogisticRegression':
        regression_models[r] = linear_model.LogisticRegression()
    elif r == 'LogisticRegressionCV':
        regression_models[r] = linear_model.LogisticRegressionCV()
    elif r == 'MultiTaskElasticNet':
        regression_models[r] = linear_model.MultiTaskElasticNet()
    elif r == 'MultiTaskElasticNetCV':
        regression_models[r] = linear_model.MultiTaskElasticNetCV()
    elif r == 'MultiTaskLasso':
        regression_models[r] = linear_model.MultiTaskLasso()
    elif r == 'MultiTaskLassoCV':
        regression_models[r] = linear_model.MultiTaskLassoCV()
    elif r == 'OrthogonalMatchingPursuit':
        regression_models[r] = linear_model.OrthogonalMatchingPursuit()
    elif r == 'OrthogonalMatchingPursuitCV':
        regression_models[r] = linear_model.OrthogonalMatchingPursuitCV()
    elif r == 'PassiveAggressiveClassifier':
        regression_models[r] = linear_model.PassiveAggressiveClassifier()
    elif r == 'PassiveAggressiveRegressor':
        regression_models[r] = linear_model.PassiveAggressiveRegressor()
    elif r == 'Perceptron':
        regression_models[r] = linear_model.Perceptron()
    elif r == 'RANSACRegressor':
        regression_models[r] = linear_model.RANSACRegressor()
    elif r == 'Ridge':
        regression_models[r] = linear_model.Ridge()
    elif r == 'RidgeClassifier':
        regression_models[r] = linear_model.RidgeClassifier()
    elif r == 'RidgeClassifierCV':
        regression_models[r] = linear_model.RidgeClassifierCV()
    elif r == 'RidgeCV':
        regression_models[r] = linear_model.RidgeCV()
    elif r == 'SGDClassifier':
        regression_models[r] = linear_model.SGDClassifier()
    elif r == 'SGDRegressor':
        regression_models[r] = linear_model.SGDRegressor()
    elif r == 'TheilSenRegressor':
        regression_models[r] = linear_model.TheilSenRegressor()
    else:
        print(
            r +
            " is an unsupported regression type. Check if you have misspelled the name."
        )
示例#20
0
    def generate_prediction(cls, race):
        """Generate a prediction for the specified race"""

        prediction = {
            'race_id': race['_id'],
            'earliest_date': cls.get_earliest_date(),
            'prediction_version': cls.PREDICTION_VERSION,
            'seed_version': Seed.SEED_VERSION,
            'results': None,
            'score': None,
            'train_seeds': None,
            'test_seeds': None,
            'estimator': None
        }

        predictor = None
        generate_predictor = False

        segment = tuple(race['entry_conditions']) + tuple(
            [race['track_condition']])
        with cls.predictor_cache_lock:
            if segment in cls.predictor_cache:
                predictor = cls.predictor_cache[segment]
            else:
                cls.predictor_cache[segment] = None
                generate_predictor = True

        if generate_predictor:

            similar_races = pyracing.Race.find({
                'entry_conditions':
                race['entry_conditions'],
                'track_condition':
                race['track_condition'],
                'start_time': {
                    '$lt': race.meet['date']
                }
            })
            if len(similar_races) >= (1 / cls.TEST_SIZE):

                try:

                    train_races, test_races = cross_validation.train_test_split(
                        similar_races, test_size=cls.TEST_SIZE)

                    train_X = []
                    train_y = []
                    for train_race in train_races:
                        for seed in train_race.seeds:
                            if seed['result'] is not None:
                                train_X.append(seed.normalized_data)
                                train_y.append(seed['result'])

                    test_X = []
                    test_y = []
                    for test_race in test_races:
                        for seed in test_race.seeds:
                            if seed['result'] is not None:
                                test_X.append(seed.normalized_data)
                                test_y.append(seed['result'])

                    predictor = {
                        'classifier': None,
                        'score': None,
                        'train_seeds': len(train_y),
                        'test_seeds': len(test_y),
                        'estimator': None
                    }
                    dual = len(train_X) < len(train_X[0])
                    kernel = 'linear'
                    loss = 'epsilon_insensitive'
                    if not dual:
                        loss = 'squared_epsilon_insensitive'
                    for estimator in (
                            linear_model.BayesianRidge(),
                            linear_model.ElasticNet(),
                            linear_model.LinearRegression(),
                            linear_model.LogisticRegression(),
                            linear_model.OrthogonalMatchingPursuit(),
                            linear_model.PassiveAggressiveRegressor(),
                            linear_model.Perceptron(), linear_model.Ridge(),
                            linear_model.SGDRegressor(),
                            svm.SVR(kernel=kernel),
                            svm.LinearSVR(dual=dual,
                                          loss=loss), svm.NuSVR(kernel=kernel),
                            tree.DecisionTreeRegressor(),
                            tree.ExtraTreeRegressor()):
                        logging.debug(
                            'Trying {estimator} for {segment}'.format(
                                estimator=estimator.__class__.__name__,
                                segment=segment))

                        try:
                            classifier = pipeline.Pipeline([
                                ('feature_selection',
                                 feature_selection.SelectFromModel(
                                     estimator, 'mean')),
                                ('regression', estimator)
                            ])
                            classifier.fit(train_X, train_y)
                            score = classifier.score(test_X, test_y)

                            if predictor['classifier'] is None or predictor[
                                    'score'] is None or score > predictor[
                                        'score']:
                                logging.debug(
                                    'Using {estimator} ({score}) for {segment}'
                                    .format(
                                        estimator=estimator.__class__.__name__,
                                        score=score,
                                        segment=segment))
                                predictor['classifier'] = classifier
                                predictor['score'] = score
                                predictor[
                                    'estimator'] = estimator.__class__.__name__

                        except BaseException as e:
                            logging.debug(
                                'Caught exception while trying {estimator} for {segment}: {exception}'
                                .format(estimator=estimator.__class__.__name__,
                                        segment=segment,
                                        exception=e))
                            continue

                    cls.predictor_cache[segment] = predictor

                except:

                    del cls.predictor_cache[segment]
                    raise

            else:

                del cls.predictor_cache[segment]

        else:

            while predictor is None:
                try:
                    predictor = cls.predictor_cache[segment]
                    time.sleep(10)
                except KeyError:
                    break

        if predictor is not None:

            reverse = False
            if 'score' in predictor and predictor['score'] is not None:
                reverse = predictor['score'] < 0
                prediction['score'] = abs(predictor['score'])

            if 'classifier' in predictor and predictor[
                    'classifier'] is not None:
                raw_results = {}
                for seed in race.seeds:
                    raw_result = predictor['classifier'].predict(
                        numpy.array(seed.normalized_data).reshape(1, -1))[0]
                    if raw_result is not None:
                        if not raw_result in raw_results:
                            raw_results[raw_result] = []
                        raw_results[raw_result].append(seed.runner['number'])
                for key in sorted(raw_results.keys(), reverse=reverse):
                    if prediction['results'] is None:
                        prediction['results'] = []
                    prediction['results'].append(
                        sorted([number for number in raw_results[key]]))

            if 'train_seeds' in predictor:
                prediction['train_seeds'] = predictor['train_seeds']

            if 'test_seeds' in predictor:
                prediction['test_seeds'] = predictor['test_seeds']

            if 'estimator' in predictor:
                prediction['estimator'] = predictor['estimator']

        return prediction
import pickle
url = "https://goo.gl/sXleFv"
names = [
    'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
    'PTRATIO', 'B', 'LSTAT', 'MEDV'
]
dataframe = read_csv(url, delim_whitespace=True, names=names)
array = dataframe.values
X = array[:, 0:13]
Y = array[:, 13]
kfold = KFold(n_splits=10, random_state=7)
linregression = lm.LinearRegression()
ridge = lm.Ridge()
lasso = lm.Lasso()
lars = lm.Lars()
omp = lm.OrthogonalMatchingPursuit()
br = lm.BayesianRidge()
kn = KNeighborsRegressor()
svr = svm.SVR()
dtr = tree.DecisionTreeRegressor()
rfr = ensemble.RandomForestRegressor()
gbr = ensemble.GradientBoostingRegressor()
bag = ensemble.BaggingRegressor(br)
mse = 'neg_mean_squared_error'
r2 = 'r2'
models = [
    linregression, ridge, lasso, lars, omp, br, kn, svr, dtr, rfr, gbr, bag
]
for model in models:
    mseResult = cross_val_score(model, X, Y, cv=kfold, scoring=mse)
    r2result = cross_val_score(model, X, Y, cv=kfold, scoring=r2)
def train_test_all_regressors(X_train, X_test, y_train, y_test, seed=SEED):
    """
    Train, test and print the results of most available regressors presented in sklearn.

    Args:
        X_train (matrix): matrix with features of the training set
        y_train (list): list of values of target of the training set
        X_test (matrix): matrix with features of the test set
        y_test (list): list of values of target of the test set
    """
    assert isinstance(X_train, pd.core.frame.DataFrame)
    assert isinstance(X_test, pd.core.frame.DataFrame)
    assert isinstance(y_train, pd.core.series.Series)
    assert isinstance(y_test, pd.core.series.Series)
    assert isinstance(seed, int)

    from sklearn import linear_model
    from sklearn import tree
    from sklearn import ensemble
    from sklearn import neighbors
    from sklearn import neural_network

    models = []
    models.append(("BayesianRidge", linear_model.BayesianRidge()))
    models.append(("ElasticNet", linear_model.ElasticNet()))
    models.append(("HuberRegressor", linear_model.HuberRegressor()))
    models.append(("Lars", linear_model.Lars()))
    models.append(("Lasso", linear_model.Lasso()))
    models.append(("LassoLars", linear_model.LassoLars()))
    models.append(("LinearRegression", linear_model.LinearRegression()))
    models.append(("OrthogonalMatchingPursuit",
                   linear_model.OrthogonalMatchingPursuit()))
    models.append(("PassiveAggressiveRegressor",
                   linear_model.PassiveAggressiveRegressor()))
    models.append(("Ridge", linear_model.Ridge()))
    models.append(("SGDRegressor", linear_model.SGDRegressor()))
    models.append(
        ("AdaBoostRegressor", ensemble.AdaBoostRegressor(random_state=seed)))
    models.append(
        ("BaggingRegressor", ensemble.BaggingRegressor(random_state=seed)))
    models.append(("ExtraTreesRegressor",
                   ensemble.ExtraTreesRegressor(random_state=seed)))
    models.append(("GradientBoostingRegressor",
                   ensemble.GradientBoostingRegressor(random_state=seed)))
    models.append(("RandomForestRegressor",
                   ensemble.RandomForestRegressor(random_state=seed)))
    models.append(("DecisionTreeRegressor",
                   tree.DecisionTreeRegressor(random_state=seed)))
    models.append(("KNeighborsRegressor", neighbors.KNeighborsRegressor()))
    models.append(("MLPRegressor", neural_network.MLPRegressor()))

    best_mean_absolute_percentage_error = 100
    best_model = ''

    for name, model in models:
        print(
            '------------------------------------------------------------------------------'
        )
        print(name)
        print(
            '------------------------------------------------------------------------------'
        )

        model.fit(X_train, y_train)

        print('Training Set')
        y_pred = model.predict(X_train)
        print_results(y_train, y_pred)

        print('Testing Set')
        y_pred = model.predict(X_test)
        print_results(y_test, y_pred)

        mean_absolute_percentage_error_value = mean_absolute_percentage_error(
            y_test, y_pred)
        if mean_absolute_percentage_error_value < best_mean_absolute_percentage_error:
            best_mean_absolute_percentage_error = mean_absolute_percentage_error
            best_model = name

    print(
        '------------------------------------------------------------------------------'
    )
    print('Best model: ' + best_model)
    print('Best mean absolute percentage error: ' +
          str(best_mean_absolute_percentage_error))
    print(
        '------------------------------------------------------------------------------'
    )
示例#23
0
    def __init__(self, method, yrange, params, i=0):  #TODO: yrange doesn't currently do anything. Remove or do something with it!
        self.algorithm_list = ['PLS',
                               'GP',
                               'OLS',
                               'OMP',
                               'Lasso',
                               'Elastic Net',
                               'Ridge',
                               'Bayesian Ridge',
                               'ARD',
                               'LARS',
                               'LASSO LARS',
                               'SVR',
                               'KRR',
                               ]
        self.method = method
        self.outliers = None
        self.ransac = False

        print(params)
        if self.method[i] == 'PLS':
            self.model = PLSRegression(**params[i])

        if self.method[i] == 'OLS':
            self.model = linear.LinearRegression(**params[i])

        if self.method[i] == 'OMP':
          # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.OrthogonalMatchingPursuit(**params_temp)

        if self.method[i] == 'LASSO':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])

            self.model = linear.Lasso(**params_temp)

        if self.method[i] == 'Elastic Net':
            params_temp = copy.copy(params[i])
            self.model = linear.ElasticNet(**params_temp)

        if self.method[i] == 'Ridge':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Ridge(**params_temp)

        if self.method[i] == 'BRR':
            self.model = linear.BayesianRidge(**params[i])

        if self.method[i] == 'ARD':
            self.model = linear.ARDRegression(**params[i])

        if self.method[i] == 'LARS':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Lars(**params_temp)

        if self.method[i] == 'LASSO LARS':
            self.model = linear.LassoLars(**params)

        if self.method[i] == 'SVR':
            self.model = svm.SVR(**params[i])

        if self.method[i] == 'KRR':
            self.model = kernel_ridge.KernelRidge(**params[i])

        if self.method[i] == 'GP':
            # get the method for dimensionality reduction and the number of components
            self.reduce_dim = params[i]['reduce_dim']
            self.n_components = params[i]['n_components']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove parameters not accepted by Gaussian Process
            params_temp.pop('reduce_dim')
            params_temp.pop('n_components')
            self.model = GaussianProcess(**params_temp)
示例#24
0
]
extract_operators = gen_extract_operators(ancestor_size, downsampled_size,
                                          patchsize, ancestor_shift)

image = scipy.misc.imread('./lena.png')
image = numpy.array(image) / 255.

y = gen_patch_2d(image, patchsize, data_shift)
# y = y[:, numpy.random.choice(y.shape[1], 3000)]
y_mean = numpy.mean(y, axis=0)
y = y - numpy.tile(y_mean, [y.shape[0], 1])

# declare lasso model
lasso = linear_model.Lasso(alpha=1e-3)
# omp =  linear_model.OrthogonalMatchingPursuit(tol=0.1, normalize=False)
omp = linear_model.OrthogonalMatchingPursuit(n_nonzero_coefs=15,
                                             normalize=False)

# aal = AncestralAtomLearning(ancestor, extract_operators, omp)

# remember datetime for filename
dtstr = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

init_ancestors = []
for i in range(10):
    theta = numpy.linspace(0, 2 * numpy.pi, ancestor_size[0])
    sin_wave = numpy.sin((i + 1) * theta)
    ancestor_init = numpy.outer(sin_wave, sin_wave)
    init_ancestors.append(ancestor_init.flatten('F'))
init_ancestors = numpy.array(init_ancestors).T

for num_ancestor in range(1, 10):
示例#25
0
    def __init__(
        self,
        method,
        yrange,
        params,
        i=0
    ):  #TODO: yrange doesn't currently do anything. Remove or do something with it!
        self.algorithm_list = [
            'PLS',
            'GP',
            'OLS',
            'OMP',
            'Lasso',
            'Elastic Net',
            'Ridge',
            'Bayesian Ridge',
            'ARD',
            'LARS',
            'LASSO LARS',
            'SVR',
            'KRR',
        ]
        self.method = method
        self.outliers = None
        self.ransac = False

        print(params)
        if self.method[i] == 'PLS':
            self.model = PLSRegression(**params[i])

        if self.method[i] == 'OLS':
            self.model = linear.LinearRegression(**params[i])

        if self.method[i] == 'OMP':
            # check whether to do CV or not
            self.do_cv = params[i]['CV']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV parameter
            params_temp.pop('CV')
            if self.do_cv is False:
                self.model = linear.OrthogonalMatchingPursuit(**params_temp)
            else:
                params_temp.pop('precompute')
                self.model = linear.OrthogonalMatchingPursuitCV(**params_temp)

        if self.method[i] == 'LASSO':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # check whether to do CV or not
            try:
                self.do_cv = params[i]['CV']
                # Remove CV parameter
                params_temp.pop('CV')
            except:
                self.do_cv = False

            if self.do_cv is False:
                self.model = linear.Lasso(**params_temp)
            else:
                params_temp.pop('alpha')
                self.model = linear.LassoCV(**params_temp)

        if self.method[i] == 'Elastic Net':
            params_temp = copy.copy(params[i])
            try:
                self.do_cv = params[i]['CV']
                params_temp.pop('CV')
            except:
                self.do_cv = False

            if self.do_cv is False:
                self.model = linear.ElasticNet(**params_temp)
            else:
                params_temp['l1_ratio'] = [.1, .5, .7, .9, .95, .99, 1]
                self.model = linear.ElasticNetCV(**params_temp)

        if self.method[i] == 'Ridge':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            try:
                # check whether to do CV or not
                self.do_cv = params[i]['CV']

                # Remove CV parameter
                params_temp.pop('CV')
            except:
                self.do_cv = False

            if self.do_cv:
                self.model = linear.RidgeCV(**params_temp)
            else:
                self.model = linear.Ridge(**params_temp)

        if self.method[i] == 'BRR':
            self.model = linear.BayesianRidge(**params[i])

        if self.method[i] == 'ARD':
            self.model = linear.ARDRegression(**params[i])

        if self.method[i] == 'LARS':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            try:
                # check whether to do CV or not
                self.do_cv = params[i]['CV']

                # Remove CV parameter
                params_temp.pop('CV')
            except:
                self.do_cv = False

            if self.do_cv is False:
                self.model = linear.Lars(**params_temp)
            else:
                self.model = linear.LarsCV(**params_temp)

        if self.method[i] == 'LASSO LARS':
            model = params[i]['model']
            params_temp = copy.copy(params[i])
            params_temp.pop('model')

            if model == 0:
                self.model = linear.LassoLars(**params_temp)
            elif model == 1:
                self.model = linear.LassoLarsCV(**params_temp)
            elif model == 2:
                self.model = linear.LassoLarsIC(**params_temp)
            else:
                print("Something went wrong, \'model\' should be 0, 1, or 2")

        if self.method[i] == 'SVR':
            self.model = svm.SVR(**params[i])

        if self.method[i] == 'KRR':
            self.model = kernel_ridge.KernelRidge(**params[i])

        if self.method[i] == 'GP':
            # get the method for dimensionality reduction and the number of components
            self.reduce_dim = params[i]['reduce_dim']
            self.n_components = params[i]['n_components']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove parameters not accepted by Gaussian Process
            params_temp.pop('reduce_dim')
            params_temp.pop('n_components')
            self.model = GaussianProcess(**params_temp)
示例#26
0
def fit_regression(P, x, u, rule="LS", retall=False, **kws):
    """
Fit a polynomial chaos expansion using linear regression.

Parameters
----------
P : Poly
    Polynomial chaos expansion with `P.shape=(M,)` and `P.dim=D`.
x : array_like
    Collocation nodes with `x.shape=(D,K)`.
u : array_like
    Model evaluations with `len(u)=K`.
retall : bool
    If True return uhat in addition to R
rule : str
    Regression method used.

    The follwong methods uses scikits-learn as backend.
    See `sklearn.linear_model` for more details.

    Key     Scikit-learn    Description
    ---     ------------    -----------
        Parameters      Description
        ----------      -----------

    "BARD"  ARDRegression   Bayesian ARD Regression
        n_iter=300      Maximum iterations
        tol=1e-3        Optimization tolerance
        alpha_1=1e-6    Gamma scale parameter
        alpha_2=1e-6    Gamma inverse scale parameter
        lambda_1=1e-6   Gamma shape parameter
        lambda_2=1e-6   Gamma inverse scale parameter
        threshold_lambda=1e-4   Upper pruning threshold

    "BR"    BayesianRidge   Bayesian Ridge Regression
        n_iter=300      Maximum iterations
        tol=1e-3        Optimization tolerance
        alpha_1=1e-6    Gamma scale parameter
        alpha_2=1e-6    Gamma inverse scale parameter
        lambda_1=1e-6   Gamma shape parameter
        lambda_2=1e-6   Gamma inverse scale parameter

    "EN"    ElastiNet       Elastic Net
        alpha=1.0       Dampening parameter
        rho             Mixing parameter in [0,1]
        max_iter=300    Maximum iterations
        tol             Optimization tolerance

    "ENC"   ElasticNetCV    EN w/Cross Validation
        rho             Dampening parameter(s)
        eps=1e-3        min(alpha)/max(alpha)
        n_alphas        Number of alphas
        alphas          List of alphas
        max_iter        Maximum iterations
        tol             Optimization tolerance
        cv=3            Cross validation folds

    "LA"    Lars            Least Angle Regression
        n_nonzero_coefs Number of non-zero coefficients
        eps             Cholesky regularization

    "LAC"   LarsCV          LAR w/Cross Validation
        max_iter        Maximum iterations
        cv=5            Cross validation folds
        max_n_alphas    Max points for residuals in cv

    "LAS"   Lasso           Least Absolute Shrinkage and
                            Selection Operator
        alpha=1.0       Dampening parameter
        max_iter        Maximum iterations
        tol             Optimization tolerance

    "LASC"  LassoCV         LAS w/Cross Validation
        eps=1e-3        min(alpha)/max(alpha)
        n_alphas        Number of alphas
        alphas          List of alphas
        max_iter        Maximum iterations
        tol             Optimization tolerance
        cv=3            Cross validation folds

    "LL"    LassoLars       Lasso and Lars model
        max_iter        Maximum iterations
        eps             Cholesky regularization

    "LLC"   LassoLarsCV     LL w/Cross Validation
        max_iter        Maximum iterations
        cv=5            Cross validation folds
        max_n_alphas    Max points for residuals in cv
        eps             Cholesky regularization

    "LLIC"  LassoLarsIC     LL w/AIC or BIC
        criterion       "AIC" or "BIC" criterion
        max_iter        Maximum iterations
        eps             Cholesky regularization

    "OMP"   OrthogonalMatchingPursuit
        n_nonzero_coefs Number of non-zero coefficients
        tol             Max residual norm (instead of non-zero coef)

    Local methods

    Key     Description
    ---     -----------
    "LS"    Ordenary Least Squares

    "T"     Ridge Regression/Tikhonov Regularization
        order           Order of regularization (or custom matrix)
        alpha           Dampning parameter (else estimated from gcv)

    "TC"    T w/Cross Validation
        order           Order of regularization (or custom matrix)
        alpha           Dampning parameter (else estimated from gcv)


Returns
-------
R[, uhat]

R : Poly
    Fitted polynomial with `R.shape=u.shape[1:]` and `R.dim=D`.
uhat : np.ndarray
    The Fourier coefficients in the estimation.

Examples
--------
>>> P = cp.Poly([1, x, y])
>>> s = [[-1,-1,1,1], [-1,1,-1,1]]
>>> u = [0,1,1,2]
>>> print fit_regression(P, s, u)
0.5q1+0.5q0+1.0

    """

    x = np.array(x)
    if len(x.shape) == 1:
        x = x.reshape(1, *x.shape)
    u = np.array(u)

    Q = P(*x).T
    shape = u.shape[1:]
    u = u.reshape(u.shape[0], int(np.prod(u.shape[1:])))

    rule = rule.upper()

    # Local rules
    if rule == "LS":
        uhat = la.lstsq(Q, u)[0].T

    elif rule == "T":
        uhat, alphas = rlstsq(Q, u, kws.get("order", 0),
                              kws.get("alpha", None), False, True)
        uhat = uhat.T

    elif rule == "TC":
        uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), True)
        uhat = uhat.T

    else:

        # Scikit-learn wrapper
        try:
            _ = lm
        except:
            raise NotImplementedError("sklearn not installed")

        if rule == "BARD":
            solver = lm.ARDRegression(fit_intercept=False, copy_X=False, **kws)

        elif rule == "BR":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.BayesianRidge(**kws)

        elif rule == "EN":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.ElasticNet(**kws)

        elif rule == "ENC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.ElasticNetCV(**kws)

        elif rule == "LA":  # success
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.Lars(**kws)

        elif rule == "LAC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LarsCV(**kws)

        elif rule == "LAS":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.Lasso(**kws)

        elif rule == "LASC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoCV(**kws)

        elif rule == "LL":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoLars(**kws)

        elif rule == "LLC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoLarsCV(**kws)

        elif rule == "LLIC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoLarsIC(**kws)

        elif rule == "OMP":
            solver = lm.OrthogonalMatchingPursuit(**kws)

        uhat = solver.fit(Q, u).coef_

    u = u.reshape(u.shape[0], *shape)

    R = po.sum((P * uhat), -1)
    R = po.reshape(R, shape)

    if retall == 1:
        return R, uhat
    elif retall == 2:
        if rule == "T":
            return R, uhat, Q, alphas
        return R, uhat, Q
    return R
# Prepare ensemble regressors

regressors = (
    linear_model.LinearRegressi        on(fit_intercept=True),
    Pipeline(
        [('poly', PolynomialFeatures(degree=2)),
         ('linear', linear_model.LinearRegression(fit_intercept=False))]
    ),
    linear_model.Ridge(alpha=.1, fit_intercept=True),
    linear_model.RidgeCV(alphas=[.01, .1, .3, .5, 1], fit_intercept=True),
    linear_model.Lasso(alpha=1, fit_intercept=True),
    linear_model.LassoCV(n_alphas=100, fit_intercept=True),
    linear_model.ElasticNet(alpha=1),
    linear_model.ElasticNetCV(n_alphas=100, l1_ratio=.5),
    linear_model.OrthogonalMatchingPursuit(),
    linear_model.BayesianRidge(),
    linear_model.ARDRegression(),
    linear_model.SGDRegressor(),
    linear_model.PassiveAggressiveRegressor(loss='squared_epsilon_insensitive'),
    linear_model.RANSACRegressor(),
    LinearSVR(max_iter=1e4, fit_intercept=True, loss='squared_epsilon_insensitive', C=0.5),
    SVR(max_iter=1e4, kernel='poly', C=1, degree=4),
    SVR(max_iter=1e4, kernel='rbf', C=1, gamma=0.1),
    SVR(kernel='linear', C=1),
    SVR(kernel='linear', C=0.5),
    SVR(kernel='linear', C=0.1),
    DecisionTreeRegressor(max_depth=5),
    DecisionTreeRegressor(max_depth=4),
    DecisionTreeRegressor(max_depth=None),
    RandomForestRegressor(n_estimators=100),
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn import linear_model

classifiers = {
    "Nearest Neighbors" : KNeighborsClassifier(3),
    "LinearRegression": linear_model.LinearRegression(),
    "Ridge": linear_model.Ridge(alpha = .5),
    "Lasso": linear_model.Lasso(alpha = 0.1),
    "ElasticNet": linear_model.ElasticNet(random_state=0),
    "Lars": linear_model.Lars(n_nonzero_coefs=1),
    "LassoLars": linear_model.LassoLars(alpha=.1),
    "Omp": linear_model.OrthogonalMatchingPursuit(n_nonzero_coefs=1),
    "BayesianRidge":linear_model.BayesianRidge(),
    "ARDRegression":linear_model.ARDRegression(),
    "LogisitcRegression":linear_model.LogisticRegression(),
    "SGDClassifier":linear_model.SGDClassifier(),
    "Perceptron": linear_model.Perceptron(),
    "PassiveAggressiveClassifier": linear_model.PassiveAggressiveClassifier(),
    "Theil-Sen": linear_model.TheilSenRegressor(random_state=42),
    "RANSAC": linear_model.RANSACRegressor(random_state=42),
    "Huber": linear_model.HuberRegressor(),
    "SVC linear": SVC(kernel="linear", C=0.025),
    "SVC": SVC(gamma=2, C=1, probability=True),
    "GuassianProcess":GaussianProcessClassifier(1.0 * RBF(1.0)),
    "DecisionTree":DecisionTreeClassifier(max_depth=5),
    "RandomForest":RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    "NeutraNet":MLPClassifier(alpha=1),
def train_test_all_regressors_with_cross_validation(X, y, seed=SEED):
    """
    Train, test and print the results of most available regressors presented in sklearn using cross validation.
    Args:
        X_train (matrix): matrix with features of the training set
        y_train (list): list of values of target of the training set
        X_test (matrix): matrix with features of the test set
        y_test (list): list of values of target of the test set
    """
    assert isinstance(X, pd.core.frame.DataFrame)
    assert isinstance(y, pd.core.series.Series)
    assert isinstance(seed, int)

    from sklearn import linear_model
    from sklearn import tree
    from sklearn import ensemble
    from sklearn import neighbors
    from sklearn import neural_network

    from sklearn.model_selection import cross_val_score

    models = []
    models.append(("BayesianRidge", linear_model.BayesianRidge()))
    models.append(("ElasticNet", linear_model.ElasticNet()))
    models.append(("HuberRegressor", linear_model.HuberRegressor()))
    models.append(("Lars", linear_model.Lars()))
    models.append(("Lasso", linear_model.Lasso()))
    models.append(("LassoLars", linear_model.LassoLars()))
    models.append(("LinearRegression", linear_model.LinearRegression()))
    models.append(("OrthogonalMatchingPursuit",
                   linear_model.OrthogonalMatchingPursuit()))
    models.append(("PassiveAggressiveRegressor",
                   linear_model.PassiveAggressiveRegressor()))
    models.append(("Ridge", linear_model.Ridge()))
    models.append(("SGDRegressor", linear_model.SGDRegressor()))
    models.append(
        ("AdaBoostRegressor", ensemble.AdaBoostRegressor(random_state=seed)))
    models.append(
        ("BaggingRegressor", ensemble.BaggingRegressor(random_state=seed)))
    models.append(("ExtraTreesRegressor",
                   ensemble.ExtraTreesRegressor(random_state=seed)))
    models.append(("GradientBoostingRegressor",
                   ensemble.GradientBoostingRegressor(random_state=seed)))
    models.append(("RandomForestRegressor",
                   ensemble.RandomForestRegressor(random_state=seed)))
    models.append(("DecisionTreeRegressor",
                   tree.DecisionTreeRegressor(random_state=seed)))
    models.append(("KNeighborsRegressor", neighbors.KNeighborsRegressor()))
    models.append(("MLPRegressor", neural_network.MLPRegressor()))

    best_rmse = 1000000000.0
    best_model = ''

    for name, model in models:
        print(
            '------------------------------------------------------------------------------'
        )
        print(name)
        print(
            '------------------------------------------------------------------------------'
        )

        scores = cross_val_score(model,
                                 X,
                                 y,
                                 scoring='neg_root_mean_squared_error',
                                 cv=5)
        scores = -scores
        scores_mean = scores.mean()
        scores_std = scores.std()
        print("RMSE: %0.3f (+/- %0.2f)" % (scores_mean, scores_std * 2))

        #mean_absolute_percentage_error_value = mean_absolute_percentage_error(y_test, y_pred)
        if scores_mean < best_rmse:
            best_rmse = scores_mean
            best_model = name

    print(
        '------------------------------------------------------------------------------'
    )
    print('Best model: ' + best_model)
    print('Best RMSE: ' + str(best_rmse))
    print(
        '------------------------------------------------------------------------------'
    )
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.datasets import make_sparse_coded_signal

if __name__ == "__main__":
    print("Generating data...")
    ncomp, nf, nncoef = 256, 10000, 32
    y, X, w = make_sparse_coded_signal(n_samples=1,
                                       n_components=ncomp,
                                       n_features=nf,
                                       n_nonzero_coefs=nncoef)
    idx, = w.nonzero()
    y = y + 0.02 * np.random.randn(len(y))
    y = y.flatten()
    X_train, X_test = X[nf // 2:], X[:nf // 2]
    y_train, y_test = y[nf // 2:], y[:nf // 2]
    print(X, y)
    print("Fitting model...")
    omp = linear_model.OrthogonalMatchingPursuit(n_nonzero_coefs=nncoef)
    omp.fit(X_train, y_train)
    print("R2 score: {0}".format(r2_score(y_test, omp.predict(X_test))))

    plt.scatter(np.arange(nf // 2), y_train, color="purple")
    plt.scatter(np.arange(nf // 2) + (nf // 2), y_test, color="red")
    plt.plot(np.arange(nf // 2), omp.predict(X_train), color="purple")
    plt.plot(np.arange(nf // 2) + (nf // 2), omp.predict(X_test), color="red")
    plt.show()