Exemplo n.º 1
0
 def setUp(self):
     self.task = load_boston()
     self.base_est = DecisionTreeRegressor(max_depth=2, min_split=4)
     self.boosting = FunctionalGradientBoosting(
             base_estimator=DecisionTreeRegressor(
                 max_depth=2,
                 min_split=4),
             n_estimators=5)
Exemplo n.º 2
0
def test_argument_names():
    boston = load_boston()
    X = DataFrame(boston['data'], columns=boston['feature_names'])
    y = boston['target']
    model = GradientBoostingRegressor(verbose=True).fit(X, y)
    code = sklearn2code(model, ['predict'],
                        numpy_flat,
                        argument_names=X.columns)
    boston_housing_module = exec_module('boston_housing_module', code)
    assert_array_almost_equal(model.predict(X),
                              boston_housing_module.predict(**X))
Exemplo n.º 3
0
def test_with_response_transformation():
    X, y = load_boston(return_X_y=True)

    log_y = np.log(y)

    X = pandas.DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])])
    y = pandas.DataFrame(y, columns=['y'])

    transformer = VariableTransformer(dict(y=Log(Identity('y'))))
    model = ResponseTransformingEstimator(Earth(), transformer)
    model.fit(X, y)
    log_y_pred = model.predict(X)
    assert r2_score(log_y, log_y_pred) > .8
    assert r2_score(y, log_y_pred) < .1
Exemplo n.º 4
0
def test_super_learner():
    np.random.seed(0)
    X, y = load_boston(return_X_y=True)
    X = pandas.DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])])
    model = CrossValidatingEstimator(SuperLearner(
        [('linear', LinearRegression()), ('earth', Earth(max_degree=2))],
        LinearRegression(),
        cv=5,
        n_jobs=1),
                                     cv=5)
    cv_pred = model.fit_predict(X, y)
    pred = model.predict(X)
    cv_r2 = r2_score(y, cv_pred)
    best_component_cv_r2 = max([
        r2_score(
            y,
            first(model.estimator_.cross_validating_estimators_.values()).
            cv_predictions_) for i in range(2)
    ])
    assert cv_r2 >= .9 * best_component_cv_r2

    code = sklearn2code(model, ['predict'], numpy_flat)
    module = exec_module('module', code)
    test_pred = module.predict(**X)
    try:
        assert_array_almost_equal(np.ravel(pred), np.ravel(test_pred))
    except:
        idx = np.abs(np.ravel(pred) - np.ravel(test_pred)) > .000001
        print(np.ravel(pred)[idx])
        print(np.ravel(test_pred)[idx])
        raise
    print(r2_score(y, pred))
    print(r2_score(y, cv_pred))

    print(
        max([
            r2_score(
                y,
                first(model.estimator_.cross_validating_estimators_.values()).
                cv_predictions_) for i in range(2)
        ]))
Exemplo n.º 5
0
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets.base import load_boston
from sklearn.model_selection import train_test_split

from skpro.parametric import ParametricEstimator
from skpro.parametric.estimators import Constant
from skpro.metrics import log_loss

# Define the parametric model
model = ParametricEstimator(point=RandomForestRegressor(),
                            std=Constant('std(y)'),
                            shape='norm')

# Train and predict on boston housing data
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
y_pred = model.fit(X_train, y_train).predict(X_test)

# Obtain the loss
loss = log_loss(y_test, y_pred, sample=True, return_std=True)
print('Loss: %f+-%f' % loss)

# Plot the performance
import sys
sys.path.append('../')
import utils
utils.plot_performance(y_test, y_pred)
Exemplo n.º 6
0
def create_boston_housing():
    X, y = load_boston(return_X_y=True)
    X = DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])])
    return (dict(X=X, y=y), dict(X=X), dict(X=X))
Exemplo n.º 7
0
#https://alan-turing-institute.github.io/skpro/introduction.html
import sklearn
import skpro

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import pyplot

from sklearn.datasets.base import load_boston
from sklearn.model_selection import train_test_split

from skpro.baselines import DensityBaseline
from skpro.metrics import log_loss

# Load boston housing data
X, y = load_boston(return_X_y=True) # X 506x13, y 506
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# X_train 354x13, X_test 152 x 13

# Train and predict on boston housing data using a baseline model
y_pred = DensityBaseline().fit(X_train, y_train)\
                          .predict(X_test)
# Obtain the loss
loss = log_loss(y_test, y_pred, sample=True, return_std=True)

print('Loss: %f+-%f' % loss)



def plot_performance(y_test, y_pred, filename=None):
    """
Exemplo n.º 8
0
from sklearn.datasets.base import load_boston
from sklearn.linear_model.base import LinearRegression

boston_data = load_boston()
x = boston_data['data']
y = boston_data['target']

model = LinearRegression()
model.fit(x, y)
sample_house = [[
    2.29690000e-01, 0.00000000e+00, 1.05900000e+01, 0.00000000e+00,
    4.89000000e-01, 6.32600000e+00, 5.25000000e+01, 4.35490000e+00,
    4.00000000e+00, 2.77000000e+02, 1.86000000e+01, 3.94870000e+02,
    1.09700000e+01
]]

prediction = model.predict(sample_house)

print(prediction)
Exemplo n.º 9
0
from sklearn.datasets.base import load_boston
from pyearth.earth import Earth
from pandas import DataFrame
from sklearn2code.sklearn2code import sklearn2code
from sklearn2code.languages import numpy_flat
from sklearn2code.utility import exec_module
from numpy.testing.utils import assert_array_almost_equal
from yapf.yapflib.yapf_api import FormatCode

# Load a data set.
boston = load_boston()
X = DataFrame(boston['data'], columns=boston['feature_names'])
y = boston['target']

# Fit a py-earth model.
model = Earth(max_degree=2).fit(X, y)

# Generate code from the py-earth model.
code = sklearn2code(model, ['predict'], numpy_flat)

# Execute the generated code in its own module.
boston_housing_module = exec_module('boston_housing_module', code)

# Confirm that the generated module produces output identical
# to the fitted model's predict method.
assert_array_almost_equal(model.predict(X), boston_housing_module.predict(**X))

# Print the generated code (using yapf for formatting).
print(FormatCode(code, style_config='pep8')[0])