def fix_time_estimate(events_per_developer, vectorizer, test_acc=False):
    clf_per_developer = {}
    bug_ft = {}

    acc_scores = []

    for developer, events in events_per_developer.items():
        bug_vectors = vectorizer([event[1] for event in events])
        bug_ids = [event[2] for event in events]
        fix_times = [event[0] for event in events]

        if len(fix_times) > 9:
            clf = LinearSVR(C=1000)

            x_train, x_test, y_train, y_test = train_test_split(
                bug_vectors, fix_times, test_size=0.2, random_state=42)

            clf.fit(x_train, y_train)
            score = clf.score(x_test, y_test)
            clf.fit(bug_vectors, fix_times)
            if score > 0.0:
                acc_scores.append(clf.score(x_test, y_test))
                clf_per_developer[developer] = clf
        for index, bug_id in enumerate(bug_ids):
            bug_ft[bug_id] = fix_times[index]

    logger.info("%d out of %d developers covered by fix time estimation" %
                (len(acc_scores), len(events_per_developer.items())))

    if test_acc:
        print("mean developer fix time r^2 %.2f (+/- %.2f)" %
              (np.mean(acc_scores), np.std(acc_scores)))
        import sys
        sys.exit()
    return clf_per_developer, bug_ft
示例#2
0
def lsvm_regressor(x_trn: pd.DataFrame, y_trn: np.ndarray, x_val: pd.DataFrame,
                   y_val: np.ndarray) -> tuple:
    x_trn, x_val = x_trn.copy(), x_val.copy()
    y_trn, y_val = y_trn.copy(), y_val.copy()
    model = LinearSVR(max_iter=400, C=0.05, random_state=7)
    _ = model.fit(x_trn, y_trn)

    training_score = model.score(x_trn, y_trn)
    validation_score = model.score(x_val, y_val)

    return model, training_score, validation_score
示例#3
0
def scikit_lsvr_test(size):
    X, y = datasets.make_regression(n_samples=1000,
                                    n_features=size,
                                    random_state=0,
                                    noise=4.0,
                                    bias=100.0)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25,
                                                        random_state=42)
    model = LinearSVR(random_state=42)
    model.fit(X_train, y_train)
    model.score(X_test, y_test)
示例#4
0
def finall_model(data):
    # 数据标准化
    y = pd.DataFrame(data.iloc[:, -1])
    data = pd.DataFrame(data.iloc[:, :-1])
    ss = StandardScaler().fit(data.loc[range(1994, 2014), :])
    data.loc[:, :] = ss.transform(data.loc[:, :])
    x_train = data.iloc[:-2, :]
    x_test = data.iloc[-2:, :]
    y_train = y.iloc[:-2, :]
    ss = StandardScaler()
    y_train = ss.fit_transform(y_train)
    model1 = LinearRegression()
    model2 = SVR()
    model3 = LinearSVR()
    model4 = MLPRegressor(hidden_layer_sizes=(100, 2))
    model1.fit(x_train, y_train)
    model2.fit(x_train, y_train)
    model3.fit(x_train, y_train)
    model4.fit(x_train, y_train)
    print(model1.score(x_train, y_train))
    print(model2.score(x_train, y_train))
    print(model3.score(x_train, y_train))
    print(model4.score(x_train, y_train))
    y_ = model1.predict(x_test)
    yy = np.sqrt(ss.var_) * y_ + ss.mean_

    plt.plot(y.loc[range(1994, 2014), "y"])
    plt.scatter([2014, 2015], yy, marker='*')
    plt.show()
    return data
示例#5
0
class SVMWrapper:
    def __init__(self,
                 c=1.0,
                 e=0.0,
                 loss="epsilon_insensitive",
                 dual=True,
                 max_iter=1000):
        self.regressor = LinearSVR(C=c,
                                   epsilon=e,
                                   loss=loss,
                                   dual=dual,
                                   max_iter=max_iter)
        self.training_time = None

    def train(self, x_train, y_train):
        start = time.perf_counter()
        self.regressor.fit(x_train, y_train)
        self.training_time = time.perf_counter() - start

    def score(self, x_test, y_test):
        return self.regressor.score(x_test, y_test)

    def predict(self, x_test):
        return self.regressor.predict(x_test)

    def predict_one(self, x_single):
        return self.regressor.predict(x_single)

    def get_training_time(self):
        if self.training_time is None:
            raise ValueError()
        else:
            return self.training_time
示例#6
0
    def train_SVM(self, data):
        train, validacion = data
        x_tr, y_tr = train
        x_val, y_val = validacion
        #print("El set de train tiene {} filas y {} columnas".format(x_tr.shape[0],x_tr.shape[1]))
        #print("El set de validacion tiene {} filas y {} columnas".format(x_val.shape[0],x_val.shape[1]))

        print('Start training LinearSVR...')
        start_time = self.timer()

        svr = LinearSVR()
        svr.fit(x_tr, y_tr)
        print("The R2 is: {}".format(svr.score(x_tr, y_tr)))
        self.timer(start_time)

        print("Making prediction on validation data")
        y_val = np.expm1(y_val)
        y_val_pred = np.expm1(svr.predict(x_val))
        mae = mean_absolute_error(y_val, y_val_pred)
        print("El mean absolute error de es {}".format(mae))

        print('Saving model into a pickle')
        try:
            os.mkdir('pickles')
        except:
            pass

        with open('pickles/svrCV.pkl', 'wb') as f:
            pickle.dump(svr, f)

        print('Making prediction and saving into a csv')
        y_test = svr.predict(self.x_test)

        return y_test
示例#7
0
def lin_svm(x, y, x_test, y_test):
    clf = LinearSVR(random_state=0, tol=1e-5)
    clf.fit(x, y)
    acc = clf.score(x_test, y_test)
    # print("accuracy: {} ".format(acc))

    return acc
    class LinearSVRPermuteCoef:
        def __init__(self, **kwargs):
            self.model = LinearSVR(**kwargs)

        def fit(self, X, y):
            self.model.fit(X, y)

            self.coef_ = self.model.coef_
            self.intercept_ = self.model.intercept_

            def add_coef(arr, fn):
                arr.append(fn(self.coef_))

            add_coef(coeffs_state['max'], np.max)
            add_coef(coeffs_state['min'], np.min)

            return self

        def get_params(self, deep=True):
            return self.model.get_params(deep)

        def set_params(self, **kwargs):
            self.model.set_params(**kwargs)
            return self

        def predict(self, X):
            return self.model.predict(X)

        def score(self, X, y, sample_weight=None):
            if sample_weight is not None:
                return self.model.score(X, y, sample_weight)
            else:
                return self.model.score(X, y)

        @staticmethod
        def permute_min_coefs():
            return coeffs_state['min']

        @staticmethod
        def permute_max_coefs():
            return coeffs_state['max']

        @staticmethod
        def reset_perm_coefs():
            coeffs_state['min'] = []
            coeffs_state['max'] = []
    class LinearSVRPermuteCoef:
        def __init__(self, **kwargs):
            self.model = LinearSVR(**kwargs)

        def fit(self, X, y):
            self.model.fit(X, y)

            self.coef_ = self.model.coef_
            self.intercept_ = self.model.intercept_

            def add_coef(arr, fn):
                arr.append(fn(self.coef_))

            add_coef(coeffs_state['max'], np.max)
            add_coef(coeffs_state['min'], np.min)

            return self

        def get_params(self, deep=True):
            return self.model.get_params(deep)

        def set_params(self, **kwargs):
            self.model.set_params(**kwargs)
            return self

        def predict(self, X):
            return self.model.predict(X)

        def score(self, X, y, sample_weight=None):
            if sample_weight is not None:
                return self.model.score(X, y, sample_weight)
            else:
                return self.model.score(X, y)

        @staticmethod
        def permute_min_coefs():
            return coeffs_state['min']

        @staticmethod
        def permute_max_coefs():
            return coeffs_state['max']

        @staticmethod
        def reset_perm_coefs():
            coeffs_state['min'] = []
            coeffs_state['max'] = []
def svm_regressor(train_data, train_label, test_data, test_label, parameters):
    min_error = 10000000000
    error = []

    # tuned_parameters = [{'kernel': ['rbf'], 'gamma': [100,10,1,1e-1, 1e-2,],
    #                      'C': [0.1,1, 10, 100], 'epsilon':[ 100, 1000, 10000,1e6,1e8]}]
    #                     # {'kernel': ['linear'], 'C': [1, 10, 100, 1000], 'epsilon': [1, 10,100,1000]},
    #                     # {'kernel':['poly'],'gamma': [1e-3, 1e-4],
    #                     #  'C': [1, 10, 100, 1000], 'epsilon':[ 1, 10, 100,1000]}]
    # # {'kernel': ['linear'], 'C': [1, 10, 100, 1000], 'epsilon': [1e-2, 1e-1, 1, 10]}
    # clf = GridSearchCV(SVR(), tuned_parameters, cv=5,verbose=1,n_jobs=-1)
    # clf.fit(train_data, train_label)
    # print clf.best_params_
    # print clf.cv_results_
    # tuned_parameters = [{'C': [1e-2,1e-1,1, 10, 100], 'epsilon': [1, 10, 100, 1000,10000]}]
    # clf = GridSearchCV(LinearSVR(random_state=random_state), tuned_parameters, cv=5, verbose=1, n_jobs=-1)
    # clf.fit(train_data, train_label)
    # print clf.best_params_
    # print clf.cv_results_

    # regr = SVR(kernel='rbf', gamma=0.01,C=100)
    # regr.fit(train_data, train_label)
    # score = regr.score(test_data, test_label)
    # predict = regr.predict(test_data)
    # predict = map(lambda x: [x], predict)
    # predict = np.array(predict)
    # mse = MSE(np.array(predict), test_label)
    # if (mse[0] < min_error):
    #     min_error = mse[0]
    # print mse[0]
    regr = LinearSVR(C=0.001, epsilon=1, random_state=random_state)
    regr.fit(train_data, train_label)
    score = regr.score(test_data, test_label)
    predict = regr.predict(test_data)
    predict = map(lambda x: [x], predict)
    predict = np.array(predict)
    mse = MSE(np.array(predict), test_label)
    if (mse[0] < min_error):
        min_error = mse[0]

    print 'MSE ' + parameters + ' ' + str(mse[0])

    df = pd.Series(predict.flatten(), index=test_label.index)
    price = train_label.append(test_label)
    plt.title('SVM Regression on ' + parameters)
    plt.plot(price[1000:-1], label='actual price')
    plt.plot(df, label='predicted price')
    plt.legend(loc='lower right')
    plt.xlabel('Dates')
    plt.ylabel('Price')
    # plt.show()
    directory = './svm/'
    if not os.path.exists(directory):
        os.makedirs(directory)
    plt.savefig(directory + parameters + '.png')
    plt.close()
    return
示例#11
0
    def keplerLinear(self, kepler_df):
        y = kepler_df['koi_score']
        X = kepler_df
        del X['koi_score']  # delete from X we don't need it

        # divide X and y into train and test | train on different data | test on different -> good matrix
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.25)
        regr = LinearSVR(random_state=0, tol=1e-5)
        start_time = timeit.default_timer()

        regr.fit(X, y)
        end_time = timeit.default_timer()
        accuracy = regr.score(X_test, y_test)
        elapsed_time = end_time - start_time
        return accuracy, elapsed_time
    def train_algs(self):
        """

        TRAIN WlTHOUT CROSS VALIDATION

        """

        st.subheader("Results")
        self.chosen_models_names = []
        self.chosen_models = []

        if len(self.algorithms) == 0:
            st.warning('You should select at least one algorithm')
            return

        X = self.raw_data.drop(self.out_col, axis=1)
        y = self.raw_data[self.out_col]
        msk = np.random.rand(len(X)) < self.percent_train / 100
        X_train = X[msk]
        X_test = X[~msk]
        Y_train = y[msk]
        Y_test = y[~msk]

        for alg in self.algorithms:

            if alg == 'LinearSVR':
                from sklearn.svm import LinearSVR
                svc = LinearSVR()
                svc.fit(X_train, Y_train)
                st.write("LinearSVR score", svc.score(X_test, Y_test))

                self.chosen_models_names.append('LinearSVR')
                self.chosen_models.append(svc)

            elif alg == 'RidgeCV':
                from sklearn.linear_model import RidgeCV
                rid = RidgeCV()
                rid.fit(X_train, Y_train)
                st.write("RidgeCV score", rid.score(X_test, Y_test))

                self.chosen_models_names.append('RidgeCV')
                self.chosen_models.append(rid)

            elif alg == 'Random Forest Regressor':
                from sklearn.ensemble import RandomForestRegressor
                rfc = RandomForestRegressor()
                rfc.fit(X_train, Y_train)
                st.write("rfc score", rfc.score(X_test, Y_test))

                self.chosen_models_names.append('Random Forest Regressor')
                self.chosen_models.append(rfc)

            elif alg == 'Adaboost':
                from sklearn.ensemble import AdaBoostRegressor
                ada = AdaBoostRegressor()
                ada.fit(X_train, Y_train)
                st.write("ada score", ada.score(X_test, Y_test))

                self.chosen_models_names.append('Adaboost')
                self.chosen_models.append(ada)

            elif alg == 'XGBoost':
                import xgboost as xgb
                xgb = xgb.XGBRegressor(n_estimators=300)
                xgb.fit(X_train, Y_train, verbose=0)
                st.write("xgb score", xgb.score(X_test, Y_test))

                self.chosen_models_names.append('XGBoost')
                self.chosen_models.append(xgb)

        if self.meta_model_check:
            if self.meta_model_type == "voting":
                from sklearn.ensemble import VotingRegressor
                stack = VotingRegressor(estimators=list(
                    zip(self.chosen_models_names, self.chosen_models)))
                stack.fit(X_train, Y_train)
                st.write("voting score", stack.score(X_test, Y_test))

            else:
                from sklearn.ensemble import StackingRegressor

                if self.meta_model == "GradientBoostingRegressor":
                    from sklearn.ensemble import GradientBoostingRegressor
                    stack = StackingRegressor(
                        estimators=list(
                            zip(self.chosen_models_names, self.chosen_models)),
                        final_estimator=GradientBoostingRegressor())

                elif self.meta_model == "RandomForestRegressor":
                    from sklearn.ensemble import RandomForestRegressor
                    stack = StackingRegressor(
                        estimators=list(
                            zip(self.chosen_models_names, self.chosen_models)),
                        final_estimator=RandomForestRegressor())

                stack.fit(X_train, Y_train)
                st.write("stack score", stack.score(X_test, Y_test))
nR_ramp = ramp_signal(nR, 0.01)


from sklearn.svm import LinearSVR

linear_svm = LinearSVR(C=1e08,fit_intercept = True, dual = True ,
                        epsilon = 1e-6, loss = 'squared_epsilon_insensitive',
                        max_iter = 10000, random_state = None, tol = 0.000001,
                        verbose = 0).fit(surgeXC, nU)
coefLinear = linear_svm.coef_

linear_svm_ramp = LinearSVR(C=1,fit_intercept = True, dual = True ,
                        epsilon = 1e-6, loss = 'squared_epsilon_insensitive',
                        max_iter = 10000, random_state = None, verbose = 0).fit(surgeXC, nU_ramp)
coefLinear_ramp = linear_svm_ramp.coef_
print("Train set accuracy of Surge on LinearSVR method: {:.2f}".format(linear_svm.score(surgeXC,nU)))



linear_svm1 = LinearSVR(C=1e08,fit_intercept = True, dual = True ,
                        epsilon = 1e-6, loss = 'squared_epsilon_insensitive',
                        max_iter = 10000, random_state = None, tol = 0.000001,
                        verbose = 0).fit(swayYC,nV)

coefLinear1 = linear_svm1.coef_
linear_svm1_ramp = LinearSVR(C=1e01,fit_intercept = True, dual = True ,
                        epsilon = 1e-4, loss = 'squared_epsilon_insensitive',
                        max_iter = 10000, random_state = None, tol = 0.000001,
                        verbose = 0).fit(swayYC,nV_ramp)

coefLinear1_ramp = linear_svm1_ramp.coef_
from sklearn.svm import LinearSVR
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

#Regression SVM
regressor = SVR(kernel = 'rbf', C = 10, gamma = 0.1)
regressor.fit(X_train, y_train)

regressor.score(X_test,y_test)


#Linear SVM

regressor_linear = LinearSVR()
regressor_linear.fit(X_train, y_train)
regressor_linear.score(X_test,y_test)


# use a grid search to find the best parameters (see page 268)

###################
##################
##### Question 2 b
##################
##################
from sklearn.tree import DecisionTreeRegressor

DT = DecisionTreeRegressor(max_depth=4)
DT.fit(X_train,y_train)
DT.score(X_test,y_test)
示例#15
0
data, nrows, ncols = readDataSet("YearPredictionMSD20.txt")
X = data[:, 1:91]
y = data[:, 0]
pca = PCA(n_components=10)
pca.fit(X)
PCA(copy=True,
    iterated_power='auto',
    n_components=10,
    random_state=None,
    svd_solver='auto',
    tol=0.0,
    whiten=False)
print(pca.explained_variance_ratio_)
print pca.components_
# print pca.explained_variance_
# print pca.mean_
print pca.n_components_
# print pca.noise_variance_
print pca.components_[1]
rowFeatureVector = pca.components_
X = np.dot(rowFeatureVector, X.transpose())
X = X.transpose()
print len(X)
print X
clf = LinearSVR(C=1.0, epsilon=0, verbose=1, max_iter=1000)
clf.fit(X, y)
print clf.predict(X)
print y
print clf.score(X, y)
print clf.get_params(deep=True)
示例#16
0
class AllRegressionModels:
    """
    Wrapper class around all supported regression models: LinearRegression, RandomForest, SVR, NuSVR, LinearSVR, and
    XGBRegressor.
    AllRegressionModels runs every available regression algorithm on the given dataset and outputs the coefficient of
    determination and execution time of each successful model when all_regression_models() is run.
    """
    def __init__(self, attributes=None, labels=None, test_size=0.25, verbose=False):
        """
        Initializes an AllRegressionModels object.

        The following parameters are needed to use an AllRegressionModels object:

            – attributes: a numpy array of the desired independent variables (Default is None)
            – labels: a numpy array of the desired dependent variables (Default is None)
            – test_size: the proportion of the dataset to be used for testing the model;
            the proportion of the dataset to be used for training will be the complement of test_size (Default is 0.25)
            – verbose: specifies whether or not to ouput any and all logging during model training (Default is False)

            Note: These are the only parameters allowed. All other parameters for each model will use their default
            values. For more granular control, please instantiate each model individually.

        The following instance data is found after running all_regression_models() successfully:

            – linear_regression: a reference to the LinearRegression model
            – random_forest: a reference to the RandomForest model
            – SVR: a reference to the SVR model
            – nu_SVR: a reference to the NuSVR model
            – linear_SVR: a reference to the LinearSVR model
            – XGB_regressor: a reference to the XGBRegressor model
        
        After running all_regression_models(), the coefficient of determination and execution time for each model that
        ran successfully will be displayed in tabular form. Any models that failed to run will be listed.
        """
        self.attributes = attributes
        self.labels = labels
        self.test_size = test_size
        self.verbose = verbose

        self.linear_regression = LinearRegression()
        self.random_forest = RandomForestRegressor(verbose=self.verbose)
        self.SVR = SVR(verbose=self.verbose)
        self.nu_SVR = NuSVR(verbose=self.verbose)
        self.linear_SVR = LinearSVR(verbose=self.verbose)
        self.XGB_regressor = XGBRegressor(verbosity=int(self.verbose))

        self._regression_models = {"Model": ["R2 Score", "Time"]}
        self._failures = []

    # Accessor methods

    def get_attributes(self):
        """
        Accessor method for attributes.

        If an AllRegressionModels object is initialized without specifying attributes, attributes will be None.
        all_regression_models() cannot be called until attributes is a populated numpy array of independent variables;
        call set_attributes(new_attributes) to fix this.
        """
        return self.attributes

    def get_labels(self):
        """
        Accessor method for labels.

        If an AllRegressionModels object is initialized without specifying labels, labels will be None.
        all_regression_models() cannot be called until labels is a populated numpy array of dependent variables;
        call set_labels(new_labels) to fix this.
        """
        return self.labels

    def get_test_size(self):
        """
        Accessor method for test_size.

        Should return a number or None.
        """
        return self.test_size

    def get_verbose(self):
        """
        Accessor method for verbose.

        Will default to False if not set by the user.
        """
        return self.verbose

    def get_all_regression_models(self):
        """
        Accessor method that returns a list of all models.

        All models within the list will be None if all_regression_models() hasn't been called, yet.
        """
        return [self.linear_regression, self.random_forest, self.SVR, self.nu_SVR, self.linear_SVR, self.XGB_regressor]

    def get_linear_regression(self):
        """
        Accessor method for linear_regression.

        Will return None if all_regression_models() hasn't been called, yet.
        """
        return self.linear_regression

    def get_random_forest(self):
        """
        Accessor method for random_forest.

        Will return None if all_regression_models() hasn't been called, yet.
        """
        return self.random_forest

    def get_SVR(self):
        """
        Accessor method for SVR.

        Will return None if all_regression_models() hasn't been called, yet.
        """
        return self.SVR

    def get_nu_SVR(self):
        """
        Accessor method for nu_SVR.

        Will return None if all_regression_models() hasn't been called, yet.
        """
        return self.nu_SVR

    def get_linear_SVR(self):
        """
        Accessor method for linear_SVR.

        Will return None if all_regression_models() hasn't been called, yet.
        """
        return self.linear_SVR

    def get_XGB_regressor(self):
        """
        Accessor method for XGB_regressor.

        Will return None if all_regression_models() hasn't been called, yet.
        """
        return self.XGB_regressor

    # Modifier methods

    def set_attributes(self, new_attributes=None):
        """
        Modifier method for attributes.

        Input should be a numpy array of independent variables. Defaults to None.
        """
        self.attributes = new_attributes

    def set_labels(self, new_labels=None):
        """
        Modifier method for labels.

        Input should be a numpy array of dependent variables. Defaults to None.
        """
        self.labels = new_labels

    def set_test_size(self, new_test_size=0.25):
        """
        Modifier method for test_size.

        Input should be a number or None. Defaults to 0.25.
        """
        self.test_size = new_test_size

    def set_verbose(self, new_verbose=False):
        """
        Modifier method for verbose.

        Input should be a truthy/falsy value. Defaults to False.
        """
        self.verbose = new_verbose

    # Regression functionality

    def all_regression_models(self):
        """
        Driver method for running all regression models with given attributes and labels.
        all_regression_models() first trains the models and determines their coefficients of determination and
        execution time via _all_regression_models_runner(). Then, all_regression_models() calls _print_results() to
        format and print each successful model's measurements, while also listing any failed models.

        If verbose is True, all verbose logging for each model will be enabled.
        If verbose is False, all logging to stdout and stderr will be suppressed.
        """

        # Call helper method for running all regression models; suppress output, if needed
        if not self.verbose:
            suppress_output = io.StringIO()
            with redirect_stderr(suppress_output), redirect_stdout(suppress_output):
                self._all_regression_models_runner()
        else:
            self._all_regression_models_runner()
        
        # Print results
        self._print_results()
        
    # Helper methods

    def _all_regression_models_runner(self):
        """
        Helper method that runs all models using the given dataset and all default parameters.
        After running all models, each model is determined to be either a success or failure, and relevant data
        (R2 score, execution time) is recorded.

        _all_regression_models_runner() may only be called by all_regression_models().
        """

        # Split dataset
        dataset_X_train, dataset_X_test, dataset_y_train, dataset_y_test =\
            train_test_split(self.attributes, self.labels, test_size=self.test_size)

        # Run and time all models; identify each as success or failure
        try:
            start_time = time.time()
            self.linear_regression.fit(dataset_X_train, dataset_y_train)
            end_time = time.time()
            self._regression_models["LinearRegression"] =\
                [self.linear_regression.score(dataset_X_test, dataset_y_test), end_time - start_time]
        except:
            self._failures.append("LinearRegression")

        try:
            start_time = time.time()
            self.random_forest.fit(dataset_X_train, dataset_y_train)
            end_time = time.time()
            self._regression_models["RandomForest"] =\
                [self.random_forest.score(dataset_X_test, dataset_y_test), end_time - start_time]
        except:
            self._failures.append("RandomForest")

        try:        
            start_time = time.time()
            self.SVR.fit(dataset_X_train, dataset_y_train)
            end_time = time.time()
            self._regression_models["SVR"] = [self.SVR.score(dataset_X_test, dataset_y_test), end_time - start_time]
        except:
            self._failures.append("SVR")
        
        try:
            start_time = time.time()
            self.nu_SVR.fit(dataset_X_train, dataset_y_train)
            end_time = time.time()
            self._regression_models["NuSVR"] = [self.nu_SVR.score(dataset_X_test, dataset_y_test), end_time - start_time]
        except:
            self._failures.append("NuSVR")

        try:
            start_time = time.time()
            self.linear_SVR.fit(dataset_X_train, dataset_y_train)
            end_time = time.time()
            self._regression_models["LinearSVR"] =\
                [self.linear_SVR.score(dataset_X_test, dataset_y_test), end_time - start_time]
        except:
            self._failures.append("LinearSVR")

        try:
            start_time = time.time()
            self.XGB_regressor.fit(dataset_X_train, dataset_y_train)
            end_time = time.time()
            self._regression_models["XGBRegressor"] =\
                [self.XGB_regressor.score(dataset_X_test, dataset_y_test), end_time - start_time]
        except:
            self._failures.append("XGBRegressor")
        
    def _print_results(self):
        """
        Helper method that prints results of _all_regression_models_runner() in tabular form.

        _print_results() may only be called by all_regression_models() after all models have attempted to run.
        """

        # Print models that didn't fail
        print("\nResults:\n")

        for model, data in self._regression_models.items():
            print("{:<20} {:<20} {:<20}".format(model, data[0], data[1]))

        print()

        # Print failures, if any
        if len(self._failures) > 0:
            print("The following models failed to run:\n")

            for entry in self._failures:
                print(entry)
        
        print()
示例#17
0
                          predictiveAttributeNotDegree[i][18]])
        test_result_tot.append([predictiveAttributeDegree[i][2]])
train_percent = (len(predictiveAttributeNotDegree)/100)*80
count = 0
for i in range(len(predictiveAttributeNotDegree)):
    if count < train_percent:
        count = count + 1
        train_set_tot.append([predictiveAttributeNotDegree[i][0], predictiveAttributeNotDegree[i][1], predictiveAttributeNotDegree[i][6],
                          predictiveAttributeNotDegree[i][7], predictiveAttributeNotDegree[i][9], predictiveAttributeNotDegree[i][10],
                          predictiveAttributeNotDegree[i][11], predictiveAttributeNotDegree[i][12],predictiveAttributeNotDegree[i][17],
                          predictiveAttributeNotDegree[i][18]])
        train_result_tot.append([predictiveAttributeNotDegree[i][2]])
    else:
        test_set_tot.append([predictiveAttributeNotDegree[i][0], predictiveAttributeNotDegree[i][1], predictiveAttributeNotDegree[i][6],
                          predictiveAttributeNotDegree[i][7], predictiveAttributeNotDegree[i][9], predictiveAttributeNotDegree[i][10],
                          predictiveAttributeNotDegree[i][11], predictiveAttributeNotDegree[i][12],predictiveAttributeNotDegree[i][17],
                          predictiveAttributeNotDegree[i][18]])
        test_result_tot.append([predictiveAttributeNotDegree[i][2]])

train_result_tot = np.array(train_result_tot)
svm_reg_tot.fit(train_set_tot, train_result_tot.ravel())

print("----ALL ATTRIBUTE: score: ", svm_reg_tot.score(test_set_tot, test_result_tot))
#              0. matr 1.cf  6.tipoCds  7.coorte  9.annodiploma 10.votodip 11.codschool 12.tipoMat  17.mot_sta 18.sta
newStudent = [[2933, 2928, 1, 2015, 2015, 100, 200, 9, 3, 10]]
real_value = [30]
predicted = svm_reg_tot.predict(newStudent)

print("----ALL ATTRIBUTE: Predicted: ", predicted)
print("----ALL ATTRIBUTE: MSE: ", mean_squared_error(real_value, svm_reg_tot.predict(newStudent)))
print("----ALL ATTRIBUTE: Params: ", svm_reg_tot.get_params())
示例#18
0
svr_rbf = SVR(kernel='rbf')  # 核函数 rbf 高斯
svr_poly = SVR(kernel='poly', degree=2, C=1e3)  # 核函数 poly 多项式,degree=3 多项式次数为3
svr_line = SVR(kernel='linear', C=1e3)  # 和函数 linear ,C惩罚系数 默认是1.0
svr_L = LinearSVR(C=1e3)
svr_rbf.fit(X, Y)
svr_poly.fit(X, Y)
svr_line.fit(X, Y)
svr_L.fit(X, Y)
result_rbf = svr_rbf.predict(X)
result_poly = svr_poly.predict(X)
result_line = svr_line.predict(X)
result_L = svr_L.predict(X)
plt.plot(np.arange(len(result_rbf)), Y, 'b.')
plt.plot(np.arange(len(result_rbf)), result_rbf, 'k-', label='rbf')
plt.plot(np.arange(len(result_rbf)), result_poly, 'r-', label='poly')
plt.plot(np.arange(len(result_rbf)), result_line, 'y-', label='linear')
plt.plot(np.arange(len(result_rbf)), result_L, 'go-', label='LinearSVR')
plt.legend()
plt.show()
print('rbf_score:', svr_rbf.score(X, Y))
print('poly_score:', svr_poly.score(X, Y))
print('linear_score:', svr_line.score(X, Y))
print('LinearSVR:', svr_L.score(X, Y))

# 总结
# 一般推荐在做训练之前对数据进行归一化,当然测试集中的数据也需要归一化。。
# 在特征数非常多的情况下,或者样本数远小于特征数的时候,使用线性核,效果已经很好,并且只需要选择惩罚系数C即可。
# 在选择核函数时,如果线性拟合不好,一般推荐使用默认的高斯核'rbf'。这时我们主要需要对惩罚系数C和核函数参数γγ进行艰苦的调参,通过多轮的交叉验证选择合适的惩罚系数C和核函数参数γγ。
# 理论上高斯核不会比线性核差,但是这个理论却建立在要花费更多的时间来调参上。所以实际上能用线性核解决问题我们尽量使用线性核。
                 verbose=0,
                 random_state=None,
                 max_iter=1000)

# fit the model
regr.fit(X_train, y_train)

# get the prediction
prediction_svm_p = regr.predict(X_test)

# revert the prediction value
prediction_svm_p_ori = prediction_svm_p * (y.max() - y.min()) + y.min()
y_test_ori = np.array(y_test * (y.max() - y.min()) + y.min())

# get the score for this model
score = regr.score(X_test, y_test)
# calculate the mse value for the prediciton.
mse_svm_p = np.mean((prediction_svm_p_ori - y_test_ori)**2)
print("MSE with penalized SVM:", mse_svm_p)
# plot the figure to see the difference between prediction and y_test.
plt.plot(y_test_ori, label='y_test_ori')
plt.plot(prediction_svm_p_ori, label='prediction_ori')
plt.title('Comparison between y_test and prediction with SVM (L2 penalty)')
plt.ylabel('CRIM')
plt.legend()
plt.show()

########## Apply in MLP

# start to build MLP
model = Sequential()
示例#20
0
ss = StandardScaler()
x_train = ss.fit_transform(x_train)
x_test = ss.transform(x_test)

y_train = ss.fit_transform(y_train)
y_test = ss.transform(y_test)

svr = SVR(kernel="linear")
svr.fit(x_train, y_train)
svr.score(x_test, y_test)
y_predict = svr.predict(x_test)

print(
    mean_squared_error(ss.inverse_transform(y_test),
                       ss.inverse_transform(y_predict)))
print(
    mean_absolute_error(ss.inverse_transform(y_test),
                        ss.inverse_transform(y_predict)))

lsvr = LinearSVR()
lsvr.fit(x_train, y_train)
lsvr.score(x_test, y_test)
y_predict1 = lsvr.predict(x_test)

print(
    mean_squared_error(ss.inverse_transform(y_test),
                       ss.inverse_transform(y_predict1)))
print(
    mean_absolute_error(ss.inverse_transform(y_test),
                        ss.inverse_transform(y_predict1)))
示例#21
0
    X2 = X_train_reduced[test]
    Y2 = Y_train_raw[test]

    ## Train Classifiers on fold
    rdg_clf = Ridge(alpha=0.5)
    rdg_clf.fit(X1, Y1)
    lso_clf = Lasso(alpha=0.6257)
    lso_clf.fit(X1, Y1)
    svr_clf = LinearSVR(C=1e3)
    svr_clf.fit(X1, Y1)

    ## Score Classifiers on fold
    rdg_clf_score = rdg_clf.score(X2, Y2)
    lso_clf_score = lso_clf.score(X2, Y2)
    svr_clf_score = svr_clf.score(X2, Y2)

    print "Ridge:  ", rdg_clf_score
    print "Lasso:  ", lso_clf_score
    print "SVR_RBF:  ", svr_clf_score


## Train final Classifiers
# clf = Ridge(alpha=.5)
clf = LinearSVR(C=1e3, gamma=0.1)
clf.fit(X_train_reduced, Y_train_raw)
Y_predicted = clf.predict(X_test_reduced)

## Save results to csv
np.savetxt("prediction.csv", Y_predicted, fmt="%.5f", delimiter=",")
    from sklearn.neighbors import KNeighborsRegressor
    knreg = KNeighborsRegressor(n_neighbors=5)
    knreg.fit(X_train, y_train)
    score_list.append(knreg.score(X_test, y_test))

    ##  Support Vector Regressor
    from sklearn.svm import SVR
    svm_reg = SVR(kernel='poly', gamma='auto', degree=2, C=5, epsilon=0.1)
    svm_reg.fit(X_train, y_train)
    score_list.append(svm_reg.score(X_test, y_test))

    ## linearSVR
    from sklearn.svm import LinearSVR
    sv_reg = LinearSVR(max_iter=1000)
    sv_reg.fit(X_train, y_train)
    score_list.append(sv_reg.score(X_test, y_test))

    ## random forest
    from sklearn.ensemble import RandomForestRegressor
    rf_reg = RandomForestRegressor(max_depth=5)
    rf_reg.fit(X_train, y_train)
    score_list.append(rf_reg.score(X_test, y_test))
    '''
    ## LightGBM
    import lightgbm as lgb
    lgb_reg=lgb.LGBMRegressor(objective='regression')
    lgb_reg.fit(X_train, y_train)
    score_list.append(lgb_reg.score(X_test, y_test))
    '''
    '''
    ### XGBoost
示例#23
0
df = df.iloc[:2949, :]
import pickle
df.to_pickle("Final_Data")
df.read_pickle("Final_Data")

for idx, row in output_df.iterrows():
    df.loc[row['FIPS'], 'annual_count_avg'] = row['Average Annual Count']

X = df.loc[:, :'WATR']
y = df['annual_count_avg']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

from sklearn.svm import LinearSVR
svr = LinearSVR(random_state=0, tol=1e-5).fit(X_train, y_train)
svr.score(X_test, y_test)

from sklearn import svm
svm = svm.SVR().fit(X_train, y_train)
svm.score(X_test, y_test)

from sklearn.svm import NuSVR
nuSVR = NuSVR().fit(X_train, y_train)
nuSVR.score(X_test, y_test)

from sklearn import linear_model
ridge = linear_model.Ridge(alpha=0.5).fit(X_train, y_train)
ridge.score(X_test, y_test)
np.argmax(ridge.coef_)
示例#24
0
if __name__ == '__main__':
    np.random.seed(1)

    m = 100  # 데이터 샘플 개수
    X = 2 * np.random.rand(m, 1)  # 0 <= X < 2의 (100, 1) shape의 난수
    y = (4 + 3 * X + np.random.randn(m, 1)).ravel()  # (100,) 1차원 배열
    # plot_data(X, y)
    # plt.show()

    reg1 = LinearSVR(random_state=1)
    reg1.fit(X, y)
    print(reg1.intercept_, reg1.coef_)
    y_pred = reg1.predict(X)
    reg1_mse = mean_squared_error(y_true=y, y_pred=y_pred)
    reg1_rmse = np.sqrt(reg1_mse)
    reg1_r2 = reg1.score(X, y)  # R2 score
    print(reg1_rmse, reg1_r2)

    # 데이터, 회귀 직선 그래프
    axes = [0, 2]
    plot_data(X, y)
    plot_svm_regression(reg1, axes, label='LinearSVR(e=0)')

    for e in [0.5, 1.0, 1.5]:
        reg2 = LinearSVR(random_state=1, epsilon=e)
        reg2.fit(X, y)
        plot_svm_regression(reg2, axes, label=f'LinearSVR(e={e})')

    reg3 = SVR(kernel='linear')
    reg3.fit(X, y)
    plot_svm_regression(reg3, axes, label='SVR(e=0.1)')
示例#25
0
        train_set.append([
            predictiveAttributeNotDegree[i][11],
            predictiveAttributeNotDegree[i][13]
        ])
        train_result.append([predictiveAttributeNotDegree[i][2]])
    else:
        test_set.append([
            predictiveAttributeNotDegree[i][11],
            predictiveAttributeNotDegree[i][13]
        ])
        test_result.append([predictiveAttributeNotDegree[i][2]])

svm_reg = LinearSVR(epsilon=1.0, max_iter=10000000)
train_result = np.array(train_result)
svm_reg.fit(train_set, train_result.ravel())
print(svm_reg.score(test_set, test_result))
prediction = []
for item in test_set:
    items = [[item[0], item[1]]]
    prediction.append(svm_reg.predict(items))
pred = np.zeros(len(prediction))
predi = np.array(prediction)
for i in range(len(prediction)):
    pred[i] = predi[i][0]

print(("MSE: {}".format(mean_squared_error(pred, test_result))))
print("Params: ", svm_reg.get_params())
test_set = np.array(test_set)
test_result = np.array(test_result)
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
示例#26
0
class AutoSklearnRegression():
    def __init__(self,
                 appID,
                 models=None,
                 evaluation_parameters=None,
                 dimensionality_reduction=None):
        if (models == None):
            models = [
                'LinearRegression', 'LinearSVR', 'GradientBoostingRegressor'
            ]
        if (evaluation_parameters == None):
            evaluation_parameters = [
                'ExplainedVariance', 'MAE', 'MSE', 'R2Score'
            ]
        if (dimensionality_reduction == None):
            dimensionality_reduction = ['ExtraTreesClassifier']
        self.models = models
        self.dimensionality_reduction = dimensionality_reduction
        self.evaluation_parameters = evaluation_parameters
        self.models_to_use = [True, True, True]
        self.params_to_use = [True, True, True, True]
        self.set_training_parameters()
        from sklearn.linear_model import LinearRegression
        self.lr_estimator = LinearRegression()
        from sklearn.svm import LinearSVR
        self.svr_estimator = LinearSVR()
        from sklearn.ensemble import GradientBoostingRegressor
        self.gbr_estimator = GradientBoostingRegressor()
        self.appID = str(appID) + '.html'

    def set_training_parameters(self):
        if 'LinearRegression' not in self.models:
            self.models_to_use[0] = False
        if 'LinearSVR' not in self.models:
            self.models_to_use[1] = False
        if 'GradientBoostingRegressor' not in self.models:
            self.models_to_use[2] = False
        if 'ExplainedVariance' not in self.evaluation_parameters:
            self.params_to_use[0] = False
        if 'MAE' not in self.evaluation_parameters:
            self.params_to_use[1] = False
        if 'MSE' not in self.evaluation_parameters:
            self.params_to_use[2] = False
        if 'R2Score' not in self.evaluation_parameters:
            self.params_to_use[3] = False

    def train(self, data, test_size, response_col_name):
        self.test_size = test_size
        self.data = data
        self.original_data = data
        self.response = response_col_name

        self.data = self.data.dropna(axis=1, how='all')

        # Imputing nan
        import numpy as np
        self.data = self.data.replace([np.inf, -np.inf], np.nan)
        self.missing_value_columns = self.data.columns[
            self.data.isnull().any()]
        for col in self.missing_value_columns:
            if (len(self.data[col].value_counts()) < 5):
                self.data[col].fillna(self.data[col].mode()[0], inplace=True)
            else:
                self.data[col].fillna(self.data[col].mean(), inplace=True)

        # Encoding data
        from automl.sklearn.preprocessing.DummyEncode import DummyEncode
        self.data = DummyEncode(self.data).encode()

        # Sepearting target and response data
        self.Y = self.data[self.response]
        self.X = self.data.drop(self.response, 1)

        # Dimensionality Reduction
        self.X = self.select_dimensions()

        # Spliting into train and test data set
        from automl.sklearn.preprocessing.Split import Split
        self.x_train, self.x_test, self.y_train, self.y_test = Split(
        ).train_test_split(self.X, self.Y, test_size=test_size, random_state=0)

        # Training all selected models with train data
        self.headers = ['Evaluation Parameters']
        generated_models = []
        if self.models_to_use[0]:
            self.lr_model = self.lr_estimator.fit(self.x_train, self.y_train)
            self.headers.append('LinearRegression')
            generated_models.append(self.lr_model)
        if self.models_to_use[1]:
            self.svr_model = self.svr_estimator.fit(self.x_train, self.y_train)
            self.headers.append('LinearSVR')
            generated_models.append(self.svr_model)
        if self.models_to_use[2]:
            self.gbr_model = self.gbr_estimator.fit(self.x_train, self.y_train)
            self.headers.append('GradientBoostingRegressor')
            generated_models.append(self.gbr_model)

        # Predicting on test data with all selected models
        self.predict_all()

        # Print data
        print_data_str = self.print_data()

        # Generating summary of prediction
        print_summary_str = self.summary()

        # Selecting best model on the basis of R2_score
        self.best_model()

        # Returning all selected trained models
        # return generated_models

        # printing output in a html file and storing it in string variable
        with open(self.appID, 'w') as f:
            print(print_data_str,
                  print_summary_str,
                  self.best_model_str,
                  file=f)
        with open(self.appID, 'r') as myfile:
            str_output = myfile.read()

        return str_output

    def select_dimensions(self):
        if self.dimensionality_reduction[0] == 'LinearSVC':
            from automl.sklearn.preprocessing.LinearSVC import LinearSVC
            self.reducer = LinearSVC(self.X, self.Y)
            self.method_used = 'LinearSVC'
        if self.dimensionality_reduction[0] == 'ExtraTreesClassifier':
            from automl.sklearn.preprocessing.ExtraTreesClassifier import ExtraTreesClassifier
            self.reducer = ExtraTreesClassifier(self.X, self.Y)
            self.method_used = 'ExtraTreesClassifier'
        if self.dimensionality_reduction[0] == 'LogisticRegression':
            from automl.sklearn.preprocessing.LogisticRegression import LogisticRegression
            self.reducer = LogisticRegression(self.X, self.Y)
            self.method_used = 'LogisticRegression'
        if self.dimensionality_reduction[0] == 'LassoRegression':
            from automl.sklearn.preprocessing.LassoRegression import LassoRegression
            self.reducer = LassoRegression(self.X, self.Y)
            self.method_used = 'LassoRegression'
        return self.reducer.selectFeatures()

    def predict_all(self):
        self.y_pred_all = []
        if self.models_to_use[0]:
            self.y_predict__lr = self.lr_estimator.predict(self.x_test)
            self.y_pred_all.append(self.y_predict__lr)
        if self.models_to_use[1]:
            self.y_predict__svr = self.svr_estimator.predict(self.x_test)
            self.y_pred_all.append(self.y_predict__svr)
        if self.models_to_use[2]:
            self.y_predict__gbr = self.gbr_estimator.predict(self.x_test)
            self.y_pred_all.append(self.y_predict__gbr)
        # return self.y_predict__lr, self.y_predict__svr, self.y_predict__gbr

    def predict(self, data):
        # Imputing nan
        missing_value_columns = data.columns[data.isnull().any()]
        for col in missing_value_columns:
            if (len(data[col].value_counts()) < 10):
                data[col].fillna(data[col].mode()[0], inplace=True)
            else:
                data[col].fillna(data[col].mean(), inplace=True)

        # Encoding data
        from automl.sklearn.preprocessing.DummyEncode import DummyEncode
        data = DummyEncode(data).encode()

        if self.response in data.columns:
            data_y = data[self.response]
            data_x = data.drop(self.response, 1)
        else:
            data_y = None
            data_x = data
        if self.best_fit_model == self.lr_estimator:
            self.predcition = self.lr_estimator.predict(data_x)
        elif self.best_fit_model == self.svr_estimator:
            self.prediction = self.svr_estimator.predict(data_x)
        else:
            self.prediction = self.gbr_estimator.predict(data_x)
        return self.predcition

    def score(self, x_train, y_train):
        return self.lr_estimator.score(x_train,
                                       y_train), self.svr_estimator.score(
                                           x_train,
                                           y_train), self.gbr_estimator.score(
                                               x_train, y_train)

    def summary(self):
        from tabulate import tabulate
        from sklearn.metrics import explained_variance_score
        from sklearn.metrics import mean_absolute_error
        from sklearn.metrics import mean_squared_error
        from sklearn.metrics import r2_score
        evaluation_table = []
        if self.params_to_use[0]:
            evaluation_table.append(['Explained Variance Score'])
        if self.params_to_use[1]:
            evaluation_table.append(['Mean Absolute Error'])
        if self.params_to_use[2]:
            evaluation_table.append(['Mean Squared Error'])
        if self.params_to_use[3]:
            evaluation_table.append(['R2 Score'])
        for y_pred in self.y_pred_all:
            i = 0
            if self.params_to_use[0]:
                evaluation_table[i].append(
                    explained_variance_score(self.y_test, y_pred))
                i = i + 1
            if self.params_to_use[1]:
                evaluation_table[i].append(
                    mean_absolute_error(self.y_test, y_pred))
                i = i + 1
            if self.params_to_use[2]:
                evaluation_table[i].append(
                    mean_squared_error(self.y_test, y_pred))
                i = i + 1
            if self.params_to_use[3]:
                evaluation_table[i].append(r2_score(self.y_test, y_pred))
        summary_str = '<p><b>Accuracy Metric:</b></p><div style="overflow-x:auto;">'\
                      + tabulate(evaluation_table, headers=self.headers ,tablefmt="html")+ '</div>'
        return summary_str

    def print_data(self):
        tb_data = self.original_data.head(n=5)
        tb_train = self.x_train.head(n=5)
        tb_test = self.x_test.head(n=5)
        tb_columns1 = list(self.data.columns)
        tb_columns2 = list(self.x_train.columns)
        style_html = '<style>table {border-collapse: collapse;width: 80%;}th, td {padding: 8px;text-align: left;border: 1px solid #ddd; font-size: 12px;}tr:hover {background-color:#f5f5f5;}th {background-color: #ec1a3d;color: white;}</style>'
        from tabulate import tabulate
        info_tables = '<div><p><b>Data Dimensions: </b>' + str(self.data.shape[0]) \
                      + ' Rows and ' + str(self.data.shape[1]) + ' Features</p><p><b>Prediction Variable: </b>' \
                      + self.response + '</p>' + '<p><b>Available Features: </b>' + str(tb_columns1) + '</p>' \
                      + '<p><b>Columns where missing values were found and replaced: </b>' \
                      + str(self.missing_value_columns) + '</p>' +'<p><b>Method Used for Dimensionality Reduction:</b> '+ self.method_used +'</p><p><b>Selected Best Features: </b>' \
                      + str(tb_columns2) + '</p>' + '<p><b>Complete Dataset: </b>' + str(self.data.shape[0]) \
                      + ' Rows and ' + str(self.data.shape[1]) + ' Features</p>' + '<p><b>Target Feature: </b>' \
                      + self.response + '</p>' + '<div style="overflow-x:auto;">' \
                      + tabulate(tb_data, headers=tb_columns1, tablefmt="html") + '</div>' \
                      + '<p><b>Splitting:</b> [############] 100%</p>' + '<p><b>Training Dataset: </b>' \
                      + str(self.x_train.shape[0]) + ' Rows and ' + str(self.x_train.shape[1]) + ' Features</p>' \
                      + '<div style="overflow-x:auto;">' + tabulate(tb_train, headers=tb_columns2, tablefmt="html") \
                      + '</div>' + '<p><b>Testing Dataset: </b>' + str(self.x_test.shape[0]) + ' Rows and ' \
                      + str(self.x_test.shape[1]) + ' Features</p>' + '<div style="overflow-x:auto;">' \
                      + tabulate(tb_test, headers=tb_columns2, tablefmt="html") + '</div>' \
                      + '<p><b>Training Models:</b> [#############################] 100%</p>'
        return style_html + info_tables

    def best_model(self):
        acc_lr, acc_svr, acc_gbr = -10000, -10000, -10000
        from sklearn.metrics import r2_score
        if self.models_to_use[0]:
            acc_lr = r2_score(self.y_test, self.y_predict__lr)
        if self.models_to_use[1]:
            acc_svr = r2_score(self.y_test, self.y_predict__svr)
        if self.models_to_use[2]:
            acc_gbr = r2_score(self.y_test, self.y_predict__gbr)
        best_acc = max(acc_lr, acc_svr, acc_gbr)
        if best_acc == acc_lr:
            model_name = 'Linear Regression'
            self.best_fit_model = self.lr_estimator
        elif best_acc == acc_svr:
            model_name = 'Support Vector Regression'
            self.best_fit_model = self.svr_estimator
        else:
            model_name = 'Gradient Boosting Regression'
            self.best_fit_model = self.gbr_estimator
        self.best_model_str = '<p><b>Evaluating Best Model:</b> [##########] Done</p><p><b>Best Model:</b> ' \
                              + model_name + '</p><p><b>R2_Score:</b> ' + str(best_acc) + '</p></div>'
        return self.best_fit_model
def load_data():
    # load data
    boston = datasets.load_boston()
    X = boston.data
    y = boston.target
    # shuffle
    shuffle_indexes = np.random.permutation(len(X))
    X, y = X[shuffle_indexes], y[shuffle_indexes]
    return X, y


if __name__ == '__main__':
    X, y = load_data()

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=random_state)

    standard_scaler = StandardScaler()
    standard_scaler.fit(X_train)
    X_train_standard = standard_scaler.transform(X_train)
    X_test_standard = standard_scaler.transform(X_test)

    linear_svr = LinearSVR(C=1.0)
    linear_svr.fit(X_train_standard, y_train)

    score = linear_svr.score(X_test_standard, y_test)
    print(score)  # 0.8164411717195368

    pass
示例#28
0
from sklearn.svm import LinearSVR, LinearSVC
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
#beause of the previous learning,in these codes,I will not use the normalizatin and result analysing

#try to compare the SVR with the linearRegression on a same dataset

data = pd.read_csv("./Folds5x2_pp.csv", header=0, encoding="gbk")
X = data[['AT', 'V', 'AP', 'RH']]
y = data[['PE']]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=10)  #拆分成训练集和测试集
svr_Linear = LinearSVR(random_state=0)
svr_Linear.fit(X_train, y_train)
print("SVR_score:", svr_Linear.score(X_train, y_train))
liner = LinearRegression()
liner.fit(X_train, y_train)
print("Linearmodel_score:", liner.score(X_train, y_train))
#by doing so,in this example,you will see that linerRegresion fit better

#try to compare the svc with logisticregression on a same dataset
URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'
wine_dataset = pd.read_csv(URL, header=None)
wine_dataset.columns = [
    'class label', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10',
    'F11', 'F12', 'F13'
]
X, y = wine_dataset.iloc[:, 1:].values, wine_dataset.iloc[:, 0].values
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)
示例#29
0
                       random_state=None,
                       tol=0.000001,
                       verbose=0).fit(surgeXC, nU)
coefLinear = linear_svm.coef_

linear_svm_ramp = LinearSVR(C=1,
                            fit_intercept=True,
                            dual=True,
                            epsilon=1e-6,
                            loss='squared_epsilon_insensitive',
                            max_iter=10000,
                            random_state=None,
                            verbose=0).fit(surgeXC, nU_ramp)
coefLinear_ramp = linear_svm_ramp.coef_
print("Train set accuracy of Surge on LinearSVR method: {:.2f}".format(
    linear_svm.score(surgeXC, nU)))

linear_svm1 = LinearSVR(C=1e08,
                        fit_intercept=True,
                        dual=True,
                        epsilon=1e-6,
                        loss='squared_epsilon_insensitive',
                        max_iter=10000,
                        random_state=None,
                        tol=0.000001,
                        verbose=0).fit(swayYC, nV)

coefLinear1 = linear_svm1.coef_
linear_svm1_ramp = LinearSVR(C=1e08,
                             fit_intercept=True,
                             dual=True,
示例#30
0
X_test = test[features].dropna()
y_test = test[target].dropna()

svr = LinearSVR(random_state=0)
# svr = SVR(kernel='linear', C=1e3)
# svr = SVR(kernel='poly', C=1e3, degree=2)

# train the model on the training set
svr.fit(X_train, y_train)
y_pred = svr.predict(X_test)
plt.scatter(y_test, y_pred, color='blue')
plt.xlabel("Real revenue")
plt.ylabel("Predicted revenue")
plt.show()

svr_score_train = svr.score(X_test, y_test)
svr_score_test = svr.score(X_train, y_train)
print("Training score: ", svr_score_train)
print("Testing score: ", svr_score_test)

# y = movies.revenue.values

# length = 4083
# y = y.reshape(-1, 1)

# x = preprocessing.scale(x)
# y = preprocessing.scale(y)

# regr = scr_rbfear_model.scr_rbfearRegression()
# regr.fit(x,y)
示例#31
0
from sklearn.svm import LinearSVR

# initialize model
SVR_model = LinearSVR()

# fit model
SVR_model.fit(X_train, y_train)

#predictions: test data
y_pred = SVR_model.predict(X_test)

print('\n\n\nSVM report')

#Scores
print('Train score')
print(SVR_model.score(X_train, y_train))
print('Test score')
print(SVR_model.score(X_test, y_test))
print('-------------------------------------------------------')

# MAE
print('Mean absolute error')
print(mean_absolute_error(y_test, y_pred))
print('-------------------------------------------------------')

# MSE
print('Mean squared error')
print(mean_squared_error(y_test, y_pred))
print('-------------------------------------------------------')

# R-squared
示例#32
0
X_test = []
Y_test = []
for i in range(0, len(test_data)):
    for j in range(0, len(test_data[i])):
        for k in range(0, len(test_data[i][j]) - 1):
            X_test.append(test_data[i][j][k])
            Y_test.append(labels[test_data[i][j][-1]])
X_test = np.array(X_test)
Y_test = np.array(Y_test)

print(time.asctime(time.localtime(time.time())))
model = LinearSVR(loss='squared_epsilon_insensitive', verbose=1, max_iter=1000)
#model = linear_model.Ridge(max_iter=5000,fit_intercept=True)
model.fit(X_train, Y_train)
print(time.asctime(time.localtime(time.time())))
print(model.score(X_train, Y_train))

save_classifier = open("linear_model", "wb")
pickle.dump(model, save_classifier)
save_classifier.close()

f1 = open('truthMean.txt', 'w')
f2 = open('linear_predict.txt', 'w')
for i in Y_test:
    f1.write(str(i))
    f1.write('\n')
for i in list(model.predict(X_test)):
    tmp = [abs(1 - i), abs(0.6666667 - i), abs(0.33333334 - i), abs(0 - i)]
    x = tmp.index(min(tmp))
    if x == 0:
        x = 1
示例#33
0
print 'LinearSVC config:'
print lsvc.get_params()
lsvc.fit(smr_train.feature_matrix, smr_train.labels)
lsvc_score_train = lsvc.score(smr_train.feature_matrix, smr_train.labels)
print 'LinearSVC precision train: {}'.format(lsvc_score_train)
lsvc_score_test = lsvc.score(smr_test.feature_matrix, smr_test.labels)
print 'LinearSVC precision test: {}'.format(lsvc_score_test)
print ''

lsvr = LinearSVR()
print 'LinearSVR config:'
print svc.get_params()
lsvr.fit(smr_train.feature_matrix, smr_train.labels)
lsvr_score_train = svc.score(smr_train.feature_matrix, smr_train.labels)
print 'LinearSVR precision train: {}'.format(lsvr_score_train)
lsvr_score_test = lsvr.score(smr_test.feature_matrix, smr_test.labels)
print 'LinearSVR precision test: {}'.format(lsvr_score_test)
print ''

nusvc = NuSVC()
print 'NuSVC config:'
print nusvc.get_params()
nusvc.fit(smr_train.feature_matrix, smr_train.labels)
nusvc_score_train = nusvc.score(smr_train.feature_matrix, smr_train.labels)
print 'NuSVC precision train: {}'.format(nusvc_score_train)
nusvc_score_test = nusvc.score(smr_test.feature_matrix, smr_test.labels)
print 'NuSVC precision test: {}'.format(nusvc_score_test)
print ''

nusvr = NuSVR()
print 'NuSVR config:'