示例#1
0
def svm_cross_validate_category(X, y, category, C, penalty, sample_weights):

    clf_svm_1 = SGDRegressor(loss=loss,
                             penalty=penalty,
                             epsilon=epsilon,
                             alpha=C,
                             shuffle=True)
    clf_svm_2 = SGDRegressor(loss=loss,
                             penalty=penalty,
                             epsilon=epsilon,
                             alpha=C,
                             shuffle=True)

    cv_indices = generate_cv_indices(category)

    train_ids = cv_indices[0:N]
    test_ids = cv_indices[N:2 * N]

    clf_svm_1.fit(X[train_ids, :],
                  y[train_ids],
                  sample_weight=sample_weights[train_ids])
    clf_svm_2.fit(X[test_ids, :],
                  y[test_ids],
                  sample_weight=sample_weights[test_ids])

    score = np.zeros(2)
    score[0] = clf_svm_1.score(X[test_ids, :], y[test_ids])
    score[1] = clf_svm_2.score(X[train_ids, :], y[train_ids])
    mean_score = np.mean(score)

    y_1 = clf_svm_1.decision_function(X[test_ids, :])
    y_2 = clf_svm_2.decision_function(X[train_ids, :])

    u, indices = np.unique(category, return_inverse=True)
    auc = np.zeros((2, len(u)))
    for i in range(0, len(u)):

        i_inds = indices == i

        if (np.sum(test_ids & i_inds) != 0):
            fpr, tpr, thresholds = metrics.roc_curve(y[test_ids & i_inds],
                                                     y_1[i_inds[test_ids]],
                                                     pos_label=1)
            auc[0, i] = metrics.auc(fpr, tpr)

        if (np.sum(train_ids & i_inds) != 0):
            fpr, tpr, thresholds = metrics.roc_curve(y[train_ids & i_inds],
                                                     y_2[i_inds[train_ids]],
                                                     pos_label=1)
            auc[1, i] = metrics.auc(fpr, tpr)

        mean_auc = np.mean(auc, axis=0)
    print("Finished running category cross-validation")
    return mean_auc
示例#2
0
def svm_cross_validate(X, y, category, C, penalty, sample_weights):

    clf_svm_1 = SGDRegressor(loss=loss,
                             penalty=penalty,
                             epsilon=epsilon,
                             alpha=C,
                             shuffle=True)
    clf_svm_2 = SGDRegressor(loss=loss,
                             penalty=penalty,
                             epsilon=epsilon,
                             alpha=C,
                             shuffle=True)

    #N = len(category)
    #half_data= np.floor(N/2)
    #cv_indices_1= np.repeat([False],N)
    #cv_indices_2= np.repeat([False],N)
    #cv_indices_1[0:half_data] =True
    #cv_indices_2[half_data:N] =True
    #cv_indices= np.concatenate((cv_indices_1,cv_indices_2),axis=1)

    cv_indices = generate_cv_indices_unbalanced(category)

    train_ids = cv_indices[0:N]
    test_ids = cv_indices[N:2 * N]

    clf_svm_1.fit(X[train_ids, :],
                  y[train_ids],
                  sample_weight=sample_weights[train_ids])
    clf_svm_2.fit(X[test_ids, :],
                  y[test_ids],
                  sample_weight=sample_weights[test_ids])

    score = np.zeros(2)
    score[0] = clf_svm_1.score(X[test_ids, :], y[test_ids])
    score[1] = clf_svm_2.score(X[train_ids, :], y[train_ids])
    mean_score = np.mean(score)

    y_1 = clf_svm_1.decision_function(X[test_ids, :])
    y_2 = clf_svm_2.decision_function(X[train_ids, :])

    auc = np.zeros(2)
    fpr, tpr, thresholds = metrics.roc_curve(y[test_ids], y_1, pos_label=1)
    auc[0] = metrics.auc(fpr, tpr)

    fpr, tpr, thresholds = metrics.roc_curve(y[train_ids], y_2, pos_label=1)
    auc[1] = metrics.auc(fpr, tpr)

    mean_auc = np.mean(auc, axis=0)
    print("Finished running standard cross validation")
    return mean_auc
示例#3
0
class RBFSamplerSGDRegressorEstimator(BaseEstimator, TransformerMixin):
    def __init__(self,
                 gamma=1.0,
                 n_components=100,
                 random_state=None,
                 **kwargs):
        kwargs['random_state'] = random_state
        self.rbf_sampler = RBFSampler(gamma=gamma,
                                      n_components=n_components,
                                      random_state=random_state)
        self.sgdregressor = SGDRegressor(**kwargs)

    def fit(self, X, y):
        X = self.rbf_sampler.fit_transform(X)
        self.sgdregressor.fit(X, y)
        return self

    def transform(self, X, y=None):
        return np.sqrt(self.rbf_sampler.n_components) / np.sqrt(
            2.) * self.rbf_sampler.transform(X)

    def predict(self, X):
        return self.sgdregressor.predict(self.transform(X))

    def score(self, X, y):
        return self.sgdregressor.score(self.transform(X), y)
示例#4
0
def linear_model2():
    """
    梯度下降法
    :return: None
    """
    # 1.获取数据
    boston = load_boston()
    # 2.数据基本处理
    # 2.1 数据集划分
    x_train, x_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        test_size=0.2)
    # 3.特征工程 --标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.fit_transform(x_test)
    # 4.机器学习(线性回归)
    estimator = SGDRegressor(max_iter=1000,
                             learning_rate="constant",
                             eta0=0.001)
    estimator.fit(x_train, y_train)
    print("这个模型的偏置是:\n", estimator.intercept_)
    # 5.模型评估
    # 5.1 预测值和准确率
    y_pre = estimator.predict(x_test)
    print("预测值是:\n", y_pre)
    score = estimator.score(x_test, y_test)
    print("准确率是:\n", score)
    # 5.2 均方误差
    ret = mean_squared_error(y_test, y_pre)
    print("均方误差是:\n", ret)
示例#5
0
class support_vector_machine:
    _model = None

    def __init__(self):
        self._model = SGDRegressor()

    def train(self, data_x, data_y):
        self._model.fit(data_x, data_y)
        joblib.dump(self._model, 'svm_model.pickle')

    def predict(self, X):
        ret = self._model.predict(X)
        return ret

    def score(self, X, y):
        score = self._model.score(X, y)
        return score

    def load_model(self, path):
        path = os.path.join(os.path.dirname(
            os.path.abspath(__file__)), path)
        print(path)
        self._model = joblib.load(path)
        return self._model

    def get_model(self):
        return self._model
示例#6
0
def runSGD(X_train, X_test, y_train, y_test, dataname):
    all_epsilon = [0.001, 0.1, 0.5, 0.9]
    best_model = None
    max_score = 0
    for epsilon in all_epsilon:
        regressor = SGDRegressor(loss='epsilon_insensitive', epsilon=epsilon)
        regressor.fit(X_train, y_train)

        y_pred = regressor.predict(X_test)
        # plt.show()
        plt.scatter(y_test, y_pred)
        plt.plot([y_test.min(), y_test.max()],
                 [y_pred.min(), y_pred.max()],
                 'r',
                 lw=2)
        score = regressor.score(X_test, y_test)
        if score > max_score:
            best_model = regressor
        plt.title('SGD - {0}\n epsilon ={1} \nScore = {2:.3f} '.format(
            str(dataname), epsilon, score))
        plt.xlabel('Actual ')
        plt.ylabel('Predict')
        # plt.show()
        plt.savefig('runSGD_{}_{}.png'.format(strftime("%H_%M_%S", gmtime()),
                                              epsilon))
        plt.close()
    return best_model
def sgd(X, y, weight, X_test=False):
    from sklearn.linear_model import SGDRegressor
    from sklearn import cross_validation
    from sklearn.metrics import confusion_matrix
    from sklearn.preprocessing import StandardScaler

    #X_train, X_test, y_train, y_test, weight_train, weight_test = cross_validation.train_test_split(
    #        X, y, weight, test_size=0.2, random_state=0)
    clf = SGDRegressor(loss="huber", n_iter=100, penalty="l1")
    #clf = LogisticRegression( max_iter=100)

    X_train = X
    y_train = y

    scaler = StandardScaler(with_mean=False)
    scaler.fit(X_train)  # Don't cheat - fit only on training data
    X_train = scaler.transform(X_train)

    X_test = scaler.transform(X_test)  # apply same transformation to test data

    clf.fit(X_train, y_train, sample_weight=weight)

    print(clf.score(X_train,y_train,weight))

    y_pred = clf.predict(X_test)
    
    from sklearn.externals import joblib
    import scipy.io as sio
    joblib.dump(clf, 'models/sgd_.pkl') 
    sio.savemat('predict_y_forward.mat', {'y':y_pred})
示例#8
0
    def predict(self, df):

        # get time frame
        time_frame = settings.time_frame
        
        # copy of data
        df_copy = df.copy()

        from sklearn.linear_model import SGDRegressor
        from sklearn.metrics import mean_absolute_error, mean_squared_error
    
        # partition data
        X_train, y_train, X_val, y_val, X_test, y_test = self.partition(df_copy)
        
        # normalize features
        X_train_std, X_val_std, X_test_std = self.feature_scale(X_train, X_val, X_test)
        
        # instance of Linear Regression classifier
        lr = SGDRegressor()
        
        # fit model
        lr.fit(X_train_std, y_train)
        
        # predictions on validation set
        predictions = lr.predict(X_val_std)
    
        # R^2 score
        score = lr.score(X_val_std, y_val)
        
        # error
        test_error = (mean_squared_error(y_val, predictions)**.5)
        print test_error
示例#9
0
    def predict(self, df):

        # get time frame
        time_frame = settings.time_frame

        # copy of data
        df_copy = df.copy()

        from sklearn.linear_model import SGDRegressor
        from sklearn.metrics import mean_absolute_error, mean_squared_error

        # partition data
        X_train, y_train, X_val, y_val, X_test, y_test = self.partition(
            df_copy)

        # normalize features
        X_train_std, X_val_std, X_test_std = self.feature_scale(
            X_train, X_val, X_test)

        # instance of Linear Regression classifier
        lr = SGDRegressor()

        # fit model
        lr.fit(X_train_std, y_train)

        # predictions on validation set
        predictions = lr.predict(X_val_std)

        # R^2 score
        score = lr.score(X_val_std, y_val)

        # error
        test_error = (mean_squared_error(y_val, predictions)**.5)
        print test_error
示例#10
0
def test_call_fit_with_arguments_score_does_not_accept():
    mlflow.sklearn.autolog()

    from sklearn.linear_model import SGDRegressor

    assert "intercept_init" in _get_arg_names(SGDRegressor.fit)
    assert "intercept_init" not in _get_arg_names(SGDRegressor.score)

    mock_obj = mock.Mock()

    def mock_score(self, X, y, sample_weight=None):  # pylint: disable=unused-argument
        mock_obj(X, y, sample_weight)
        return 0

    assert inspect.signature(
        SGDRegressor.score) == inspect.signature(mock_score)

    SGDRegressor.score = mock_score
    model = SGDRegressor()
    X, y = get_iris()

    with mlflow.start_run() as run:
        model.fit(X, y, intercept_init=0)
        mock_obj.assert_called_once_with(X, y, None)

    run_id = run.info.run_id
    params, metrics, tags, artifacts = get_run_data(run_id)
    assert params == truncate_dict(
        stringify_dict_values(model.get_params(deep=True)))
    assert {TRAINING_SCORE: model.score(X, y)}.items() <= metrics.items()
    assert tags == get_expected_class_tags(model)
    assert MODEL_DIR in artifacts
    assert_predict_equal(load_model_by_run_id(run_id), model, X)
def SGD(x,
        y,
        test_x,
        test_y,
        loss="squared_loss",
        penalty="l1",
        alpha=0.0001,
        tol=0.001,
        random_state=1,
        eta0=0.01,
        learning_rate='optimal',
        power_t=0.25,
        max_iter=1000):
    sr = SGDRegressor(loss=loss,
                      penalty=penalty,
                      alpha=alpha,
                      tol=tol,
                      random_state=random_state,
                      eta0=eta0,
                      learning_rate=learning_rate,
                      power_t=power_t,
                      max_iter=max_iter)
    sr.partial_fit(x, y)
    y_pred_undersample = sr.predict(test_x)
    y_pred_undersample[(y_pred_undersample > 0.5)] = 1
    y_pred_undersample[(y_pred_undersample <= 0.5)] = 0
    i = sr.n_iter_
    Score = sr.score(test_x, test_y)
    F1 = f1_score(test_y, y_pred_undersample)
    P = precision_score(test_y, y_pred_undersample)
    R = recall_score(test_y, y_pred_undersample)

    tn, fp, fn, tp = confusion_matrix(test_y, y_pred_undersample).ravel()
    return Score, F1, P, R, tn, fp, fn, tp, i
def SGDTrain():
    model = SGDRegressor()
    # model.fit(x_train_standard, y_train)
    # print(model.coef_)
    # print(model.intercept_)
    data_generator = get_batch(x_train_standard, y_train)
    sgd_curve_x = []
    sgd_curve_y = []
    for i in range(epochs):  # Train for 100 epochs
        x, y = next(data_generator)
        # print(x)
        # print(y)
        model.partial_fit(x, y)
        # print(model.score(x_test_standard, y_test))
        # print(model.coef_)
        sgd_curve_x.append(i)
        sgd_curve_y.append(model.score(x_test_standard, y_test))
    predicted = model.predict(x_test_standard)
    plt.title('SGD result (4000 epochs)')
    plt.scatter(y_test, predicted, color='y', marker='o')
    plt.plot(y_test, y_test, color='g')
    plt.xlabel('True value')
    plt.ylabel('Predicted value')
    # plt.savefig('./4000_1.png')
    plt.show()
    print('SGD RMSE为:', np.sqrt(mean_squared_error(y_test, predicted)))
    return sgd_curve_x, sgd_curve_y
示例#13
0
def mylinear():
    '''
    线性回归预测房价
    :return:
    '''

    #获取数据
    lb = load_boston()
    #分割数据集到训练集和测试集
    x_train, x_test, y_train, y_test = train_test_split(lb.data,
                                                        lb.target,
                                                        test_size=0.25)
    #进行标准化
    #特征值和目标值是都必须进行标准化处理,实例化两个标准化API
    std_x = StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.transform(x_test)

    std_y = StandardScaler()
    y_train = std_y.fit_transform(y_train.reshape(-1, 1))
    y_test = std_y.transform(y_test.reshape(-1, 1))

    #estimator预测
    #正规方程求解方式预测结果
    lr = LinearRegression()
    lr.fit(x_train, y_train)
    print(lr.coef_)

    #预测测试集的房子价格
    y_lr_preidct = std_y.inverse_transform(lr.predict(x_test))
    print("正规方程测试集里面每个房子的价格:", y_lr_preidct)
    print("分数:", lr.score(x_test, y_test))
    print("正规方程的均方误差:",
          mean_squared_error(std_y.inverse_transform(y_test), y_lr_preidct))

    # 梯度下降求解方式预测结果
    sgd = SGDRegressor()
    sgd.fit(x_train, y_train)
    print(sgd.coef_)

    # 预测测试集的房子价格
    y_sgd_preidct = std_y.inverse_transform(sgd.predict(x_test))
    print("梯度下降测试集里面每个房子的价格:", y_sgd_preidct)
    print("分数:", sgd.score(x_test, y_test))
    print("梯度下降的均方误差:",
          mean_squared_error(std_y.inverse_transform(y_test), y_sgd_preidct))

    # 岭回归求解方式预测结果
    rd = Ridge()
    rd.fit(x_train, y_train)
    print(rd.coef_)

    # 预测测试集的房子价格
    y_rd_preidct = std_y.inverse_transform(rd.predict(x_test))
    print("梯度下降测试集里面每个房子的价格:", y_rd_preidct)
    print("分数:", rd.score(x_test, y_test))
    print("梯度下降的均方误差:",
          mean_squared_error(std_y.inverse_transform(y_test), y_rd_preidct))
示例#14
0
def linear():
    # 获取数据,分割数据
    lb = load_boston()
    x_train, x_test, y_train, y_test = train_test_split(lb.data, lb.target, test_size=0.25, random_state=24)

    # 标准化处理,对特征值和目标值都进行处理
    std_x = StandardScaler()
    x_train = std_x.fit_transform(x_train)
    x_test = std_x.transform(x_test)
    # 目标值先转为二位数组,并且转置,然后标准化
    y_train = np.array([y_train])
    y_test = np.array([y_test])
    std_y = StandardScaler()
    # 每一行的元素需要个数一致
    y_train = std_y.fit_transform(y_train.T)
    y_test = std_y.transform(y_test.T)

    print('--------正规方程--------')
    # 应用线性回归分析-正规方程
    lr = LinearRegression()
    lr.fit(x_train, y_train)

    # 标准化之前的数据大小!!!!!!
    # lr_predict = std_y.inverse_transform(lr.predict(x_test))

    # print("预测结果:", lr.predict(x_test))
    # print("真实结果:", y_test)
    # print("参数/系数:", lr.coef_)
    print("测试集准确率:", lr.score(x_test, y_test))

    # 如果目标值集合不标准化使用下面这一句
    # print("正规方程的均方误差:", mean_squared_error(y_test, lr.predict(x_test)))

    # 目标值集合标准化需要把标准化之前的真实数据算出来
    # 标准化之前的数据大小!!!!!!
    lr_predict = std_y.inverse_transform(lr.predict(x_test))
    print("正规方程的均方误差:", mean_squared_error(std_y.inverse_transform(y_test), lr_predict))

    print('--------SGD梯度下降--------')
    # SGD梯度下降
    sgd = SGDRegressor()
    sgd.fit(x_train, y_train)
    print("测试集准确率:", sgd.score(x_test, y_test))
    print("参数/系数:", sgd.coef_)
    # 均方误差
    sgd_predict = std_y.inverse_transform(sgd.predict(x_test))
    print("梯度下降的均方误差:", mean_squared_error(std_y.inverse_transform(y_test), sgd_predict))

    print('--------岭回归分析--------')
    # 岭回归分析
    r = Ridge(alpha=3.0)
    r.fit(x_train, y_train)
    print("测试集准确率:", r.score(x_test, y_test))
    print("参数/系数:", r.coef_)
    # 均方误差
    r_predict = std_y.inverse_transform(r.predict(x_test))
    print("岭回归的均方误差:", mean_squared_error(std_y.inverse_transform(y_test), r_predict))
示例#15
0
def trainModel(ModelType, X, y):
	if ModelType == SGDRegressor:
		model = SGDRegressor(loss='epsilon_insensitive', max_iter=100)
	else:
		model = ModelType()

	model.fit(X, y)
	accuracy = model.score(X, y)
	print("Model training score: {}".format(accuracy))
	return model
示例#16
0
def run():
    iterations = 10001
    learning_rate = 0.01
    X_train, Y_train = readTrainingData()
    X_test = readTestingData()
    scaler = StandardScaler()
    scaler.fit_transform(X_train)

    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    clf = SGDRegressor(n_iter=100)

    #clf = AdaGradRegressor(n_iter=100)

    clf.fit(X_train, Y_train)
    print clf.score(X_train, Y_train)

    predict = clf.predict(X_test)
    write_to_file(predict)
def linear_regression(features, values):
    """
    Perform linear regression given a data set with an arbitrary number of features.
    """
    clf = SGDRegressor(n_iter=100)
    clf.fit(features,values)
    print(clf.score(features,values))
    intercept = clf.intercept_ 
    params = clf.coef_
    
    return intercept, params
示例#18
0
def train(training_pandas_data, test_pandas_data, label_col, 
          feat_cols, alpha, l1_ratio, max_iter, tol, training_data_path, test_data_path):

    print("train:         " + training_data_path)
    print("test:          " + test_data_path)
    print("alpha:        ", alpha)
    print("l1-ratio:     ", l1_ratio)
    print("max_iter:     ", max_iter)
    print("tol:     ", tol)
    print("label-col:     " + label_col)
    for col in feat_cols:
        print("feat-cols:     " + col)

    # Split data into training labels and testing labels.
    trainingLabels = training_pandas_data[label_col].values
    trainingFeatures = training_pandas_data[feat_cols].values

    testLabels = test_pandas_data[label_col].values
    testFeatures = test_pandas_data[feat_cols].values

    #We will use an SGD model.
    en = SGDRegressor(alpha=alpha, l1_ratio=l1_ratio, warm_start=True, max_iter=max_iter, tol=tol)

    # Here we train the model.
    en.fit(trainingFeatures, trainingLabels)

    # Calculating the scores of the model.
    test_rmse = mean_squared_error(testLabels, en.predict(testFeatures))**0.5
    r2_score_training = en.score(trainingFeatures, trainingLabels)
    r2_score_test = en.score(testFeatures, testLabels)
    print("Test RMSE:", test_rmse)
    print("Training set score:", r2_score_training)
    print("Test set score:", r2_score_test)

    #Logging the RMSE and r2 scores.
    mlflow.log_metric("Test RMSE", test_rmse)
    mlflow.log_metric("Train R2", r2_score_training)
    mlflow.log_metric("Test R2", r2_score_test)

    #Saving the model as an artifact.
    sklearn.log_model(en, "model")
示例#19
0
def SGDRegressor_test():
    sgdr = SGDRegressor(max_iter=1000)
    sgdr.fit(X_train, y_train.ravel())
    sgdr_y_predict = sgdr.predict(X_test)
    print("快速的随机梯度下降模型评价:{}".format(sgdr.score(X_test, y_test)))
    print("使用R-squared评价标准:{}".format(r2_score(y_test, sgdr_y_predict)))
    print("使用MAE评价标准:{}".format(
        mean_absolute_error(ss_y.inverse_transform(y_test),
                            ss_y.inverse_transform(sgdr_y_predict))))
    print("使用MSE评价标准:{}".format(
        mean_squared_error(ss_y.inverse_transform(y_test),
                           ss_y.inverse_transform(sgdr_y_predict))))
def sgdregressor_sk(x_train, y_train, x_test, y_test, epochs):

    #Create the SGD regressor with best hyperparameters
    regressor = SGDRegressor(eta0=2, power_t=0.3, max_iter=epochs)

    #fit the data
    regressor.fit(x_train, y_train)
    #predict the prices
    y_pred = regressor.predict(x_test)
    #get the accuarcy by r2
    acc = regressor.score(x_test, y_test)
    return y_pred, acc
def linear_regression(features, values):
    """
    Perform linear regression given a data set with an arbitrary number of features.
    """
    
    model = SGDRegressor(n_iter=100)
    model.fit(features,values)
    print 'SCORE: ',model.score(features,values)
    intercept = model.intercept_
    params = model.coef_
    
    return intercept, params
示例#22
0
def lineaReg():
    boston = loadDataSet()
    X = boston.data
    y = boston.target.reshape((len(boston.target), 1))

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25,
                                                        random_state=33)
    print('The max target value is: ', np.max(boston.target))
    print('The min target value is: ', np.min(boston.target))
    print('The average target value is: ', np.mean(boston.target))

    ss_x = StandardScaler()
    ss_y = StandardScaler()
    X_train = ss_x.fit_transform(X_train)
    X_test = ss_x.transform(X_test)
    y_train = ss_y.fit_transform(y_train)
    y_test = ss_y.transform(y_test)

    lr = LinearRegression()
    lr.fit(X_train, y_train)
    lr_y_predict = lr.predict(X_test)

    sgdr = SGDRegressor()
    sgdr.fit(X_train, y_train)
    sgdr_y_predcit = sgdr.predict(X_test)

    # 回归问题的评估方法
    print('dafault value of LR: ', lr.score(X_test, y_test))
    print('R-squared of LR: ', r2_score(y_test, lr_y_predict))
    print(
        'Mean squared error of LR: ',
        mean_squared_error(ss_y.inverse_transform(y_test),
                           ss_y.inverse_transform(lr_y_predict)))
    print(
        'Mean absoluate of LR: ',
        mean_absolute_error(ss_y.inverse_transform(y_test),
                            ss_y.inverse_transform(lr_y_predict)))

    print('------------------------------------------------------------------')
    print('dafault value of SGDR: ', sgdr.score(X_test, y_test))
    print('R-squared of SGDR: ', r2_score(y_test, sgdr_y_predcit))
    print(
        'Mean squared error of SGDR: ',
        mean_squared_error(ss_y.inverse_transform(y_test),
                           ss_y.inverse_transform(sgdr_y_predcit)))
    print(
        'Mean absoluate of SGDR: ',
        mean_absolute_error(ss_y.inverse_transform(y_test),
                            ss_y.inverse_transform(sgdr_y_predcit)))
    return None
def sgd(pd, pl, qd, ql):
    params = {'loss':['squared_loss', 'huber', 'epsilon_insensitive',
                     'squared_epsilon_insensitive'],
                'alpha':expon(scale=1),
                'epsilon':expon(scale=1),
                'l1_ratio':uniform(),
                'penalty':[ 'l2', 'l1', 'elasticnet']}
    clf = SGDRegressor()
    #clf = RandomizedSearchCV(clf, params, n_jobs=2, n_iter=10, verbose=10)
    print("Training Linear SVM Randomly")
    clf.fit(pd, pl)
    print("Score: " + str(clf.score(qd, ql)))
    return clf
示例#24
0
    def sgd_regression(slef, x, y, prediction_set):
        ''' Perfoms SGD regression by taking x, and y and return the fit model. Attributes need to be called on sgd_regression
		coef_, intercept_ ,average_coef_ : array, shape (n_features,) ,average_intercept_, n_iter_ : int'''
        regr = SGDRegressor(max_iter=1000, tol=1e-3)

        # SGDRegressor(alpha=0.0001, average=False, early_stopping=False,
        #        epsilon=0.1, eta0=0.01, fit_intercept=True, l1_ratio=0.15,
        #        learning_rate='invscaling', loss='squared_loss', max_iter=1000,
        #        n_iter_no_change=5, penalty='l2', power_t=0.25, random_state=None,
        #        shuffle=True, tol=0.001, validation_fraction=0.1, verbose=0,
        #        warm_start=False)

        model = regr.fit(x, y)

        if isinstance(prediction_set, np.ndarray):

            y_pred = model.predict(prediction_set)
            return [regr.score(x, y), regr.predict(prediction_set)]

        else:

            return [regr.score(x, y)]
示例#25
0
def player_prediction(name):

    data = pd.read_csv('../resources/newMERGED.csv',
                       sep=',',
                       encoding='utf-8',
                       index_col=0)
    model = data[[
        'player_id', 'name', 'season', 'pos', 'round', 'team_rank',
        'opponent_team_rank', 'team_pot', 'opp_pot', 'concede_pot',
        'opp_concede_pot', 'prev_points', 'form_points', 'total_points',
        'long_form', 'ict_form'
    ]]

    MidfielderModal = model.loc[model['pos'] == 'Midfielder']
    MidfielderModal.drop('pos', axis=1, inplace=True)
    MidfielderModal.sort_values(['season', 'round'],
                                ascending=True,
                                inplace=True)
    # MidfielderModal.to_csv('../temp/MIDFIELDERS.csv', sep=',', encoding='utf-8')
    players = MidfielderModal[7959:]

    keys = MidfielderModal['round']
    values = pd.cut(MidfielderModal['round'], 3, labels=[1, 2, 3])
    dictionary = dict(zip(keys, values))
    MidfielderModal['round'] = values

    X = MidfielderModal.drop(['total_points', 'season', 'player_id', 'name'],
                             axis=1)
    y = MidfielderModal[['total_points']]

    X_train = X[:7958]
    X_test = X[7959:]
    y_train = y[:7958]
    y_test = y[7959:]

    regression_model = SGDRegressor()
    regression_model.fit(X_train, y_train)

    score = regression_model.score(X_test, y_test)
    y_pred = regression_model.predict(X_test)

    testing = pd.concat([X_test, y_test], 1)
    testing['Predicted'] = np.round(y_pred, 1)
    testing[
        'Prediction_Error'] = testing['total_points'] - testing['Predicted']
    testing['player_id'] = 0
    testing['name'] = 0
    testing['player_id'] = players.player_id
    testing['name'] = players.name

    print(testing[testing['name'] == name])
def SGD(x_train,x_test,y_train,y_test):
    # 梯度下降法
    # 自我修正的线性模型,默认学习率为0.01 这个就是碗的下滑速度
    # 梯度方向,就是你朝着偏离正下滑的多少角度向碗底迂回前进的方向
    sgd = SGDRegressor()
    sgd.fit(x_train,y_train)
    predict=sgd.predict(x_test)
    score = sgd.score(x_test,y_test)
    print(predict)
    print(score)
    k=sgd.coef_
    b=sgd.intercept_

    return k,b
示例#27
0
    def runSGDRegressor(self):
        lm = SGDRegressor(loss='squared_loss',
                          penalty='l2',
                          fit_intercept=True)

        print("SGDRegressor\n")
        lm.fit(self.m_X_train, self.m_y_train)
        predictY = lm.predict(self.m_X_test)
        score = lm.score(self.m_X_test, self.m_y_test)
        predictTraingY = lm.predict(self.m_X_train)

        self.displayPredictPlot(predictY)
        self.displayResidualPlot(predictY, predictTraingY)
        self.dispalyModelResult(lm, predictY, score)
示例#28
0
def SGDRegressor_pred(X_train, X_test, y_train_normalized, y_train_mean, y_test):
    # The learning rate:
    # ---constant: eta = eta0 [assign to the initial one, eta0]
    # ---optimal: eta = 1.0/(t+t0)
    # ---invscaling: eta = eta0 / pow(t, power_t) [default]
    clf = SGDRegressor(alpha=0.0001, eta0=0.001, n_iter=150, fit_intercept=False, shuffle=True, verbose=0)
    clf = clf.fit(X_train, y_train_normalized)

    # Conveting to back, (could be used sklearn standardization function for both decoding and encoding)
    predictions_train = clf.predict(X_train) + y_train_mean
    predictions = clf.predict(X_test) + y_train_mean

    score_test = clf.score(X_test, y_test)

    return predictions, predictions_train, score_test
示例#29
0
def SGD(task, data, split=0.3, lr="optimal", alpha=0.0001, seed=42):
    task = task.lower()

    X = data["X"]
    y = data["y"]

    if task == "r" or task == "reg" or task == "regression":
        sgd = SGDRegressor(learning_rate=lr, alpha=alpha, random_state=seed)
    elif task == "c" or task == "classify" or task == "classification":
        sgd = SGDClassifier(learning_rate=lr, alpha=alpha, random_state=seed)
    else:
        raise NameError('Task should be either regression or classification')

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=split,
                                                        random_state=seed)

    sgd.fit(X_train, y_train)
    train_preds = sgd.score(X_train, y_train)
    print("Boosting Training Accuracy: " + str(train_preds * 100) + "%")
    preds = sgd.score(X_test, y_test)
    print("Boosting Testing Accuracy: " + str(preds * 100) + "%")
    return sgd
示例#30
0
def main():
    # setting up our data and removing unwanted instances
    with open('auto_mobile_data.csv') as file:
        data = list(csv.reader(file))
        data = setup.remove_inst(data)

    # creating a set of attributes to skip
    skip_atr = setup.skip_attribute(data)

    # adding attributes with no correlation to skip_atr (see scatter_plots\analysis.txt)
    # remove: symboling, losses, car height, bore, stroke, compression ratio, peak rpm
    skip_atr.update({0, 1, 12, 18, 19, 20, 22, 25})

    # dictionary of the mean of values of attributes
    mean_nums = setup.missing_values(data)

    # arranging the x and y data
    # x is a 2d list and y is 1d
    x = []
    y = [float(i[len(i) - 1]) for i in data]
    for i in data:
        thing = [
            i[val] if i[val] != '?' else mean_nums[val]
            for val in range(len(i))
        ]
        x.append([
            float(thing[val]) for val in range(len(thing))
            if val not in skip_atr
        ])

    # splitting our data into training and testing data
    train_x, train_y, test_x, test_y = train_test(x, y)

    # preparing data for regression
    x = np.array(train_x)
    y = np.array(train_y)
    x = minmax_scale(x)
    y = minmax_scale(y)

    # fitting data to our model
    sgd = SGDRegressor().fit(x, y)

    # scoring our model
    # we must score our model with unseen data to prevent over/under fitting
    test_x = minmax_scale(test_x)
    test_y = minmax_scale(test_y)
    print('score of model: ', sgd.score(test_x, test_y))
示例#31
0
def consider_SGD():
    performances: List[ExperimentResult] = []
    for rnd in range(3):
        for penal in ["l1","l2","elasticnet"]:
            for los in ["squared_loss","huber"]:
                params = {
                    "random_state": rnd,
                    "penalty": penal,
                    "max_iter": 100,
                    "loss":los
                }
                f = SGDRegressor(**params)
                f.fit(X_train, y_train)
                vali_acc = f.score(X_vali, y_vali)
                result = ExperimentResult(vali_acc, params, f)
                performances.append(result)
    return min(performances, key=lambda result: result.vali_acc)
示例#32
0
def SGD_boston():
    boston = load_boston()
    x = boston.data
    y = boston.target
    train_x, test_x, train_y, test_y = \
        train_test_split(x, y, test_size=.25)
    std_s = StandardScaler()
    train_x = std_s.fit_transform(train_x)
    test_x = std_s.fit_transform(test_x)

    sgd = SGDRegressor()
    sgd.fit(train_x, train_y)
    score = sgd.score(test_x, test_y)
    predict_y = sgd.predict(test_x)
    print(score)
    print(predict_y[:20])
    print(test_y[:20])
    # print(sgd.coef_)
    # print(sgd.intercept_)

    return None
示例#33
0
文件: main.py 项目: PsychoGeek13/ml
def SDGRegressionExample():
    import numpy as np
    from sklearn.datasets import load_boston
    from sklearn.linear_model import SGDRegressor
    from sklearn.cross_validation import cross_val_score
    from sklearn.preprocessing import StandardScaler
    from sklearn.cross_validation import train_test_split
    data = load_boston()
    X_train, X_test, y_train, y_test = train_test_split(data.data,data.target)
    X_scaler = StandardScaler()
    y_scaler = StandardScaler()
    X_train = X_scaler.fit_transform(X_train)
    y_train = y_scaler.fit_transform(y_train)
    X_test = X_scaler.transform(X_test)
    y_test = y_scaler.transform(y_test)
    regressor = SGDRegressor(loss='squared_loss')
    scores = cross_val_score(regressor, X_train, y_train, cv=5)
    print 'Cross validation r-squared scores:', scores
    print 'Average cross validation r-squared score:', np.mean(scores)
    regressor.fit_transform(X_train, y_train)
    print 'Test set r-squared score', regressor.score(X_test, y_test)
示例#34
0
def get_sgd(X_train, X_test, y_train, y_test):
    temp_max_itr = 100000
    dest_eta = 1e-5
    dest_tol = 1e-3
    temp_coef = 0.01
    dest_coef = temp_coef
    dest_intercept = 0.0
    max = -1000
    #    mode = 'w'
    #    cnt = 1
    while temp_coef <= 2.0:
        temp_intercept = 0.0
        while temp_intercept <= 50.0:
            sgd = SGDRegressor(random_state=15,
                               max_iter=temp_max_itr,
                               eta0=dest_eta,
                               tol=dest_tol,
                               n_iter_no_change=6)
            sgd.fit(X_train,
                    y_train,
                    coef_init=temp_coef,
                    intercept_init=temp_intercept)
            scr = sgd.score(X_test, y_test)
            #Checking if scored more than previous max score
            if max < scr:
                max = scr
                dest_coef = temp_coef
                dest_intercept = temp_intercept
#            if cnt > 1 :
#                mode = 'a'
#            cnt += 1
#            write_to_file(scr,dest_coef, dest_intercept, mode)
            temp_intercept += 1.0
        temp_coef += 0.1
    sgd1 = SGDRegressor(random_state=15,
                        max_iter=temp_max_itr,
                        eta0=dest_eta,
                        tol=dest_tol,
                        n_iter_no_change=6)
    return sgd1, dest_coef, dest_intercept
示例#35
0
class SGDRegressionModel(RegressionModel):
	def __init__(self, train_data):
		RegressionModel.__init__(self, train_data)
		self.model = SGDRegressor()

	def train(self, x=None, y=None):
		x = x if x is not None else self.train_x
		y = y if y is not None else self.train_y

		self.model.fit(x, y)

	def predict(self, x_in):
		return self.model.predict(x_in)

	def evaluate(self, x_in, y_out):
		return self.model.score(x_in, y_out)

	def save(self, filename):
		joblib.dump(self.model, filename)

	def load(self, filename):
		self.model = joblib.load(filename)
示例#36
0
def test_both_fit_and_score_contain_sample_weight(sample_weight_passed_as):
    mlflow.sklearn.autolog()

    from sklearn.linear_model import SGDRegressor

    # ensure that we use an appropriate model for this test
    assert "sample_weight" in _get_arg_names(SGDRegressor.fit)
    assert "sample_weight" in _get_arg_names(SGDRegressor.score)

    mock_obj = mock.Mock()

    def mock_score(self, X, y, sample_weight=None):  # pylint: disable=unused-argument
        mock_obj(X, y, sample_weight)
        return 0

    assert inspect.signature(
        SGDRegressor.score) == inspect.signature(mock_score)

    SGDRegressor.score = mock_score
    model = SGDRegressor()
    X, y = get_iris()
    sample_weight = abs(np.random.randn(len(X)))

    with mlflow.start_run() as run:
        if sample_weight_passed_as == "positional":
            model.fit(X, y, None, None, sample_weight)
        elif sample_weight_passed_as == "keyword":
            model.fit(X, y, sample_weight=sample_weight)
        mock_obj.assert_called_once_with(X, y, sample_weight)

    run_id = run.info.run_id
    params, metrics, tags, artifacts = get_run_data(run_id)
    assert params == truncate_dict(
        stringify_dict_values(model.get_params(deep=True)))
    assert {TRAINING_SCORE: model.score(X, y)}.items() <= metrics.items()
    assert tags == get_expected_class_tags(model)
    assert MODEL_DIR in artifacts
    assert_predict_equal(load_model_by_run_id(run_id), model, X)
示例#37
0
def SGDDemo():
  import numpy as np
  from sklearn.datasets import load_boston
  from sklearn.linear_model import SGDRegressor
  from sklearn.cross_validation import cross_val_score
  from sklearn.preprocessing import StandardScaler
  from sklearn.cross_validation import train_test_split

  data = load_boston()
  X_train,X_test,y_train,y_test = train_test_split(data.data,data.target)

  X_scaler = StandardScaler()
  y_scaler = StandardScaler()
  X_train = X_scaler.fit_transform(X_train)
  y_train = y_scaler.fit_transform(y_train)
  X_test = X_scaler.transform(X_test)
  y_test = y_scaler.transform(y_test)

  regressor = SGDRegressor(loss='squared_loss')
  scores = cross_val_score(regressor,X_train,y_train,cv=5)
  print "Cross validation r-sqr ",np.mean(scores)
  regressor.fit_transform(X_train,y_train)
  print "TEST score :",regressor.score(X_test,y_test)
    Y_scaler = StandardScaler()

    X_train = X_scaler.fit_transform(X_train)
    Y_train = Y_scaler.fit_transform(Y_train)
    X_test = X_scaler.transform(X_test)
    Y_test = Y_scaler.transform(Y_test)

    print X_train[0:5]


    print len(X_train)
    print Y_test

    clf =SGDRegressor(loss="squared_loss")
    scores = cross_val_score(clf,X_train,Y_train,cv=5)
    print scores
    print np.mean(scores)

    clf.fit_transform(X_train,Y_train)

    pred  = clf.predict(X_test)

    print  clf.score(X_test,Y_test)




    # correlation(X_train,Y_train)
    # feature_selection(X_train,Y_train)
    scatter_plot(X_train,Y_train)
import numpy as np
from sklearn.datasets import load_boston
from sklearn.linear_model import SGDRegressor
from sklearn.cross_validation import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
data = load_boston()
print(data)
x_train,x_test,y_train,y_test = train_test_split(data.data,data.target)

x_scaler = StandardScaler()
y_scaler = StandardScaler()
x_train=x_scaler.fit_transform(x_train)
y_train=y_scaler.fit_transform(y_train)
x_test = x_scaler.transform(x_test)
y_test=y_scaler.transform(y_test)

regressor = SGDRegressor(loss='squared_loss')
scores=cross_val_score(regressor,x_train,y_train,cv=5)
print('Cross Validation r-squared scores:',scores)
print('Average cross validation r-squared score',np.mean(scores))
regressor.fit_transform(x_train,y_train)
print('Test set r-squared score',regressor.score(x_test,y_test))






示例#40
0
	X_test = X[test]
	y_train = y[train]
	y_test = y[test]

	#iris = datasets.load_iris()
	#print(iris.data)
	#X = iris.data[:,0:3]
	#y = iris.data[:,3]

	start_time = time.time()
	sgd = SGDRegressor(alpha=0.01, average=False, epsilon=0.1, eta0=0.01,
             fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
             loss='squared_loss', n_iter=1000, penalty='l2', power_t=0.25,
             random_state=None, shuffle=True, verbose=0, warm_start=False)
	sgd.fit(X_train.astype('float64'),y_train)
	elapsed_time = time.time() - start_time
	print("Time %s"%elapsed_time)
	print(sgd.coef_,sgd.intercept_)
	print("Accuracy %s"%sgd.score(X_test,y_test))

	start_time = time.time()
	psgd = ParallelSGDRegressor(alpha=0.01, average=False, epsilon=0.1, eta0=0.01,
             fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
             loss='squared_loss', n_iter=1000, penalty='l2', power_t=0.25,
             random_state=None, shuffle=True, verbose=0, warm_start=False)
	psgd.fit(X_train.astype('float64'), y_train)
	elapsed_time = time.time() - start_time
	print("Time %s" %elapsed_time)
	print(psgd.coef_,psgd.intercept_)
	print("Accuracy %s"%psgd.score(X_test,y_test))
示例#41
0
import numpy as np

from sklearn.datasets import load_boston
from sklearn.linear_model import SGDRegressor
from sklearn.cross_validation import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split

# load and split data
data = load_boston()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target)

# scale the features
X_scaler = StandardScaler()
y_scaler = StandardScaler()

X_train = X_scaler.fit_transform(X_train)
y_train = y_scaler.fit_transform(y_train)
X_test = X_scaler.fit_transform(X_test)
y_test = y_scaler.fit_transform(y_test)

# train
regressor = SGDRegressor(loss='squared_loss')
scores = cross_val_score(regressor, X_train, y_train, cv=5)
print('Cross validation r-squared scores: {0}'.format(scores))
print('Average cross validation r-squared score: {0}'.format(np.mean(scores)))

regressor.fit_transform(X_train, y_train)
print('Test set r-squared score: {0}'.format(regressor.score(X_test, y_test)))
示例#42
0
# we wont use causal and registered in tutorial
df.drop(['registered', 'casual'], axis=1, inplace=True)
df.drop(['datetime'], axis=1, inplace=True)

from sklearn.cross_validation import train_test_split
# X are our features without 'count'
X = df.drop(['count'], axis=1)
# y is the target 'count'
y = df['count']
X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.70, random_state=2)

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor

X_train_transformed = StandardScaler().fit_transform(X_train)
X_test_transformed = StandardScaler().fit_transform(X_test)
clf = SGDRegressor()
# train you model
clf.fit(X_train_transformed, y_train)
# print out predicted values
print(clf.predict(X_test_transformed))
# print out how well the model fits for data
print("Model Score: ", clf.score(X_train_transformed, y_train))

from sklearn.pipeline import make_pipeline
clf = make_pipeline(StandardScaler(), SGDRegressor())
clf.fit(X_train, y_train)
print("Model Score: ", clf.score(X_train, y_train))
示例#43
0
    for idx in range(int(np.ceil(n_trainSamples / mini_batch))):
        x_batch = train[ind[idx * mini_batch: min((idx + 1) * mini_batch, n_trainSamples)]]
        y_batch = train_target[ind[idx * mini_batch: min((idx + 1) * mini_batch, n_trainSamples)]]

        if idx > 0:
            validationScore.append(clf.score(x_batch, y_batch))
        clf.partial_fit(x_batch, y_batch)
        if idx > 0:
            trainScore.append(clf.score(x_batch, y_batch))

    plt.plot(trainScore, label="train score")
    plt.plot(validationScore, label="validation socre")
    plt.xlabel("Mini_batch")
    plt.ylabel("Score")
    plt.legend(loc='best')
    plt.title(title)

sgd_regresor = SGDRegressor(penalty='l2',alpha=0.001)
plot_learning(sgd_regresor,"SGDRegressor")

test = test_subset.drop(['EbayID','Price','SellerName'], axis=1)
test = scaler.fit_transform(test)
test_target = test_subset['Price']

print("SGD regressor prediction result on testing data: %f" % sgd_regresor.score(test,test_target))

plt.show()


示例#44
0
X_scaler = StandardScaler()
y_scaler = StandardScaler()
X_train = X_scaler.fit_transform(X_train)
y_train = y_scaler.fit_transform(y_train)
X_test = X_scaler.transform(X_test)
y_test = y_scaler.transform(y_test)

regressor = SGDRegressor(loss='squared_loss')
score = cross_val_score(regressor, X_train, y_train, cv=5)
print score
print np.mean(score)

regressor.fit_transform(X_train, y_train)

print regressor.score(X_test, y_test)














    y.append(float(s_data[4]))
    y2.append(float(s_data[1]))
pprint.pprint("Training the supervised learning model... Fit on training data")
print ("=========================================")
try:
    clf = SGDRegressor(loss="huber")
    pprint.pprint(clf.fit(X, y))
except:
    raise
try:
    clf2 = SGDRegressor(loss="huber")
    pprint.pprint(clf2.fit(X, y2))
except:
    raise
print ("=========================================")
print "Model testing itself! Confidence score on the training data used to construct:", clf.score(X, y)
pprint.pprint("Ready to predict")
print ("=========================================")


pprint.pprint("Testing with test data...")

test_data = list()
test_diff = list()
predict_diff = list()
for index in test_indices:
    tmp = data[index][1:5]
    my_tmp = list()
    for item in tmp:
        my_tmp.append(float(item))
    test_data.append(my_tmp)
class Model(object):
    def __init__(self, params):
        self.model_class = params['class']
        self.model = {}
        self.feature_constructor = None
        self.all_possible_decisions = []
        self.X = []
        self.y = []
        self.buffer = 0

    def initialize(self):
        if self.model_class == 'scikit':
            self.model = SGDRegressor(loss='squared_loss', alpha=0.1, n_iter=10, shuffle=True, eta0=0.0001)
            self.feature_constructor = FeatureHasher(n_features=200, dtype=np.float64, non_negative=False, input_type='dict')

        elif self.model_class == 'lookup':
            self.model = {}

    def clean_buffer(self):
        self.X = []
        self.y = []
        self.buffer = 0

    def return_design_matrix(self, all_decision_states, reward=None):
        if self.model_class == 'lookup_table':
            return all_decision_states, reward

        elif self.model_class == 'scikit':
            X, y = [], []
            for decision_state in all_decision_states:
                information, decision_taken = decision_state
                tr = {}
                tr['-'.join([str(information[1]), decision_taken])] = 1
                tr['-'.join([str(information[0]), decision_taken])] = 1
                tr['-'.join([str(information[0]), str(information[1]), decision_taken])] = 1

                X.append(tr)
                y.extend([reward])
            X = self.feature_constructor.transform(X).toarray()

            return X, y

    def fit(self, X, y):
        if self.model_class == 'scikit':
            # X, y = self.shuffle_data(X, y)
            self.model.partial_fit(X, y)
            print self.model.score(X, y)

        if self.model_class == 'lookup_table':
            for decision_state in X:
                if decision_state not in self.model:
                    for d in self.all_possible_decisions:
                        self.model[(decision_state[0], d)] = DecisionState()

                self.model[decision_state].count += 1
                updated_value = self.model[decision_state].value_estimate + (1.0 / self.model[decision_state].count) * (
                y - self.model[decision_state].value_estimate)
                self.model[decision_state].value_estimate = updated_value

    def predict(self, X):
        if self.model_class == 'scikit':
            return self.model.predict(X)

        if self.model_class == 'lookup_table':
            if X not in self.model:
                for d in self.all_possible_decisions:
                    self.model[(X[0], d)] = DecisionState()
            return self.model[X].value_estimate

    @staticmethod
    def shuffle_data(a, b):
        assert len(a) == len(b)
        p = np.random.permutation(len(a))
        return a[p], b[p]
示例#47
0
import numpy as np
from sklearn.datasets import load_boston
from sklearn.linear_model import SGDRegressor
from sklearn.cross_validation import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split

data = load_boston()
# print data
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target)
X_scaler = StandardScaler()
y_scaler = StandardScaler()
# print X_train
X_train = X_scaler.fit_transform(X_train)
y_train = y_scaler.fit_transform(y_train)
# print X_train
X_test = X_scaler.fit_transform(X_test)
y_test = y_scaler.fit_transform(y_test)

regressor = SGDRegressor(loss='squared_loss')
scores = cross_val_score(regressor, X_train, y_train, cv=5)
print X_train.shape
print "CV ", scores
print regressor.fit_transform(X_train, y_train).shape
print "Test r-ss", regressor.score(X_test, y_test)
示例#48
0
def fit_SGD(features_train, labels_train, features_pred):
	model = SGDRegressor()
	model.fit(features_train, labels_train)
	labels_pred = model.predict(features_pred)
	print "SGD - coefficient of determination R^2 of the prediction: ", model.score(features_train, labels_train)
	return labels_pred