Пример #1
def compare_panorama_cubic(greenery_measure="vegetation", **kwargs):
    """ Compare/plot the segmentation results of panoramic and cubic
        images to each other. Also use linear regression to determine
        how they relate to each other.

    green_kwargs = select_green_model(greenery_measure)

    panorama_tiler = TileManager(cubic_pictures=False, **kwargs, **green_kwargs)
    cubic_tiler = TileManager(cubic_pictures=True, **kwargs, **green_kwargs)

    panorama_green = panorama_tiler.green_direct()
    cubic_green = cubic_tiler.green_direct()

    _remove_missing(panorama_green, cubic_green)
    x = np.arange(0, 0.8, 0.01)

    x_pano = np.array(panorama_green["green"]).reshape(-1, 1)
    y_cubic = np.array(cubic_green["green"])
    reg = LinearRegression().fit(x_pano, y_cubic)
    print(reg.score(x_pano, y_cubic))
    print(reg.coef_[0], reg.intercept_)
    plt.scatter(panorama_green["green"], cubic_green["green"])
    plt.plot(x, reg.predict(x.reshape(-1, 1)))
    plt.xlim(0, max(0.001, max(panorama_green["green"])*1.1))
    plt.ylim(0, max(0.001, max(cubic_green["green"])*1.1))

    plot_greenery(panorama_green, show=False, title="panorama")
    plot_greenery(cubic_green, show=False, title="cubic")
Пример #2
def test_linear_regression_sample_weights():
    # TODO: loop over sparse data as well

    rng = np.random.RandomState(0)

    # It would not work with under-determined systems
    for n_samples, n_features in ((6, 5), ):

        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        for intercept in (True, False):

            # LinearRegression with explicit sample_weight
            reg = LinearRegression(fit_intercept=intercept)
            reg.fit(X, y, sample_weight=sample_weight)
            coefs1 = reg.coef_
            inter1 = reg.intercept_

            assert_equal(reg.coef_.shape, (X.shape[1], ))  # sanity checks
            assert_greater(reg.score(X, y), 0.5)

            # Closed form of the weighted least square
            # theta = (X^T W X)^(-1) * X^T W y
            W = np.diag(sample_weight)
            if intercept is False:
                X_aug = X
                dummy_column = np.ones(shape=(n_samples, 1))
                X_aug = np.concatenate((dummy_column, X), axis=1)

            coefs2 = linalg.solve(

            if intercept is False:
                assert_array_almost_equal(coefs1, coefs2)
                assert_array_almost_equal(coefs1, coefs2[1:])
                assert_almost_equal(inter1, coefs2[0])
Пример #3
 def test_score(self):
     # create some data
     x = np.array(list(range(0, 10)))
     y = x * 2
     df = pd.DataFrame({'x': x,
                        'y': y})
     X = df[['x']]
     Y = df[['y']]
     # put into Omega
     os.environ['DJANGO_SETTINGS_MODULE'] = ''
     om = Omega()
     om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True
     om.datasets.put(X, 'datax')
     om.datasets.put(Y, 'datay')
     # create a model locally, fit it, store in Omega
     lr = LinearRegression()
     lr.fit(X, Y)
     scores = lr.score(X, Y)
     om.models.put(lr, 'mymodel')
Пример #4
def test_linear_regression_sample_weights():
    # TODO: loop over sparse data as well

    rng = np.random.RandomState(0)

    # It would not work with under-determined systems
    for n_samples, n_features in ((6, 5), ):

        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        for intercept in (True, False):

            # LinearRegression with explicit sample_weight
            reg = LinearRegression(fit_intercept=intercept)
            reg.fit(X, y, sample_weight=sample_weight)
            coefs1 = reg.coef_
            inter1 = reg.intercept_

            assert_equal(reg.coef_.shape, (X.shape[1], ))  # sanity checks
            assert_greater(reg.score(X, y), 0.5)

            # Closed form of the weighted least square
            # theta = (X^T W X)^(-1) * X^T W y
            W = np.diag(sample_weight)
            if intercept is False:
                X_aug = X
                dummy_column = np.ones(shape=(n_samples, 1))
                X_aug = np.concatenate((dummy_column, X), axis=1)

            coefs2 = linalg.solve(X_aug.T.dot(W).dot(X_aug),

            if intercept is False:
                assert_array_almost_equal(coefs1, coefs2)
                assert_array_almost_equal(coefs1, coefs2[1:])
                assert_almost_equal(inter1, coefs2[0])
Пример #5
def test_linear_regression_sample_weights():
    rng = np.random.RandomState(0)

    for n_samples, n_features in ((6, 5), (5, 10)):
        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        clf = LinearRegression()
        clf.fit(X, y, sample_weight)
        coefs1 = clf.coef_

        assert_equal(clf.coef_.shape, (X.shape[1], ))
        assert_greater(clf.score(X, y), 0.9)
        assert_array_almost_equal(clf.predict(X), y)

        # Sample weight can be implemented via a simple rescaling
        # for the square loss.
        scaled_y = y * np.sqrt(sample_weight)
        scaled_X = X * np.sqrt(sample_weight)[:, np.newaxis]
        clf.fit(X, y)
        coefs2 = clf.coef_

        assert_array_almost_equal(coefs1, coefs2)
Пример #6
def test_linear_regression_sample_weights():
    rng = np.random.RandomState(0)

    for n_samples, n_features in ((6, 5), (5, 10)):
        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        clf = LinearRegression()
        clf.fit(X, y, sample_weight)
        coefs1 = clf.coef_

        assert_equal(clf.coef_.shape, (X.shape[1], ))
        assert_greater(clf.score(X, y), 0.9)
        assert_array_almost_equal(clf.predict(X), y)

        # Sample weight can be implemented via a simple rescaling
        # for the square loss.
        scaled_y = y * np.sqrt(sample_weight)
        scaled_X = X * np.sqrt(sample_weight)[:, np.newaxis]
        clf.fit(X, y)
        coefs2 = clf.coef_

        assert_array_almost_equal(coefs1, coefs2)
Пример #7
X_train, X_test, y_train, y_test = train_test_split(boston.data,

poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.fit_transform(X_test)
model2 = LinearRegression(normalize=True)
model2.fit(X_train_poly, y_train)
mutilScore = model2.score(X_test_poly, y_test)

y_pred = model2.predict(X_test_poly)
print("MSE:", metrics.mean_squared_error(y_test, y_pred))

predicted = cross_val_predict(model2, boston.data, boston.target, cv=10)
print("MSE:", metrics.mean_squared_error(boston.target, predicted))

import matplotlib.pyplot as plt
plt.scatter(boston.target, predicted, color="y", marker="o")
plt.scatter(boston.target, boston.target, color="g", marker="+")
Пример #8
boston = load_boston()

X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.2, random_state=2) 

model1 = LinearRegression(normalize=True)
model1.fit(X_train, y_train)
simpleScore=model1.score(X_test, y_test)

print("MSE:",metrics.mean_squared_error(y_test, y_pred))

predicted=cross_val_predict(model1, boston.data, boston.target, cv=10)
print ("MSE:", metrics.mean_squared_error(boston.target, predicted))
Пример #9
    plt.scatter( feature, target, color=test_color ) 
for feature, target in zip(feature_train, target_train):
    plt.scatter( feature, target, color=train_color ) 

### labels for the legend
plt.scatter(feature_test[0], target_test[0], color=test_color, label="test")
plt.scatter(feature_test[0], target_test[0], color=train_color, label="train")

from sklearn.linear_model.base import LinearRegression

reg = LinearRegression()
reg.fit(feature_train, target_train)
print("Slope %s" % reg.coef_)
print("Intercept %s" % reg.intercept_)

print("Score = ", reg.score(feature_test, target_test))
### draw the regression line, once it's coded
    plt.plot( feature_test, reg.predict(feature_test) )
except NameError:
reg.fit(feature_test, target_test)
plt.plot(feature_train, reg.predict(feature_train), color="b")
print("Slope2 %s" % reg.coef_)
print("Intercept2 %s" % reg.intercept_)
### and n_columns is the number of features
ages       = numpy.reshape( numpy.array(ages), (len(ages), 1))
net_worths = numpy.reshape( numpy.array(net_worths), (len(net_worths), 1))
from sklearn.cross_validation import train_test_split
ages_train, ages_test, net_worths_train, net_worths_test = train_test_split(ages, net_worths, test_size=0.1, random_state=42)

### fill in a regression here!  Name the regression object reg so that
### the plotting code below works, and you can see what your regression looks like
from sklearn.linear_model.base import LinearRegression

reg = LinearRegression()
reg.fit(ages_train, net_worths_train)
print("Slope %s" % reg.coef_)
print("Intercept %s" % reg.intercept_)

print("Score = ", reg.score(ages_test, net_worths_test))

    plt.plot(ages, reg.predict(ages), color="blue")
except NameError:
plt.scatter(ages, net_worths)

### identify and remove the most outlier-y points
# labels
y = np.array(df['label'])
This next section jumbles the rows, but keeps the relationship between X and y.
This is so that we can train the linearRegression model, and then test it on different
data so that we know that it is now able to get the answers right!
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    X, y, test_size=0.2)

# Create and train a classifier
clf = LinearRegression(n_jobs=-1)
# training data
clf.fit(X_train, y_train)
# test the data
accuracy = clf.score(X_test, y_test)

# predict future <forecast_col> values
forecast_set = clf.predict(X_lately)

df['Forecast'] = np.nan

# set up dates to use on the graph
last_date = df.iloc[-1].name
last_unix = last_date.timestamp()
one_day = 86400
next_unix = last_unix + one_day

for i in forecast_set:
    next_date = datetime.datetime.fromtimestamp(next_unix)
    next_unix += one_day
Пример #12
# Splitting data into test_random_forest and train
# train_set, test_set = train_test_split(data_df, test_size=0.01, random_state=np.random.randint(1, 1000))
# Removing all unused variable for memory management

# Separate output from inputs
y_train = data_df['time_to_failure']
x_train_seg = data_df['segment_id']
x_train = data_df.drop(['time_to_failure','segment_id'], axis=1)

# y_test = test_set['time_to_failure']
# x_test_seg = test_set['segment_id']
# x_test = test_set.drop(['time_to_failure'], axis=1)
# x_test = x_test.drop(['segment_id'], axis=1)

model = LinearRegression(n_jobs=4) 
model.fit(x_train, y_train)

mh = ModelHolder(model, most_dependent_columns)
model = None
mh_new = load_model(model_name)
model, most_dependent_columns = mh_new.get()

print('Evaluating test data , transforming test data now ... ')
print('Calculating score and error .. ')
y_pred = model.predict(x_train)
print('Score', model.score(x_train, y_train))

mas = mean_absolute_error(y_train, y_pred)
print('Mean Absolute Error', mas)