Exemplo n.º 1
0
def test_residual_prediction():
    data = DataManager('boston')

    baseline_model = ParametricEstimator(LinearRegression())
    model = ParametricEstimator(point=LinearRegression(),
                                std=ResidualEstimator(LinearRegression()))

    baseline = baseline_model.fit(data.X_train,
                                  data.y_train).predict(data.X_test)
    y_pred = model.fit(data.X_train, data.y_train).predict(data.X_test)

    baseline_loss = linearized_log_loss(data.y_test, baseline)
    y_pred_loss = linearized_log_loss(data.y_test, y_pred)

    assert baseline_loss > y_pred_loss
Exemplo n.º 2
0
def test_simple_model():
    data = DataManager('boston')

    model = ParametricEstimator(LinearRegression(), LinearRegression())
    y_pred = model.fit(data.X_train, data.y_train).predict(data.X_test)

    utils.assert_close_prediction(y_pred.point(), data.y_test, within=0.5)
Exemplo n.º 3
0
def test_baseline():
    data = DataManager('boston')

    model = ParametricEstimator()
    y_pred = model.fit(data.X_train, data.y_train).predict(data.X_test)

    mu = np.mean(data.y_train)
    sigma = np.std(data.y_train)

    # is the dummy prediction working?
    assert (y_pred.point() == np.ones((len(data.X_test))) * mu).all()
    assert (y_pred.std() == np.ones((len(data.X_test))) * sigma).all()

    # does subsetting work?
    assert len(y_pred[1:3].point()) == 2
    assert len(y_pred[1:3].lp2()) == 2

    # pdf, cdf?
    x = np.random.randint(0, 10)
    i = np.random.randint(0, len(data.X_test) - 1)

    assert y_pred[i].pdf(x) == norm.pdf(x, mu, sigma)
    assert y_pred[i].cdf(x) == norm.cdf(x, mu, sigma)
Exemplo n.º 4
0
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets.base import load_boston
from sklearn.model_selection import train_test_split

from skpro.parametric import ParametricEstimator
from skpro.parametric.estimators import Constant
from skpro.metrics import log_loss

# Define the parametric model
model = ParametricEstimator(point=RandomForestRegressor(),
                            std=Constant('std(y)'),
                            shape='norm')

# Train and predict on boston housing data
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
y_pred = model.fit(X_train, y_train).predict(X_test)

# Obtain the loss
loss = log_loss(y_test, y_pred, sample=True, return_std=True)
print('Loss: %f+-%f' % loss)

# Plot the performance
import sys
sys.path.append('../')
import utils
utils.plot_performance(y_test, y_pred)