Пример #1
0
def test_sklearn2code_export():
    np.random.seed(0)
    X, y = make_classification(n_classes=2)
    X = DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])])
    loss_function = BinomialDeviance(2)
    model = Booster(Earth(max_degree=2, use_fast=True, max_terms=10), loss_function)
    model.fit(X, y)
    code = sklearn2code(model, ['predict', 'predict_proba', 'transform'], numpy_flat)
    module = exec_module('test_module', code)
    assert_correct_exported_module(model, module, ['predict', 'predict_proba', 'transform'], dict(X=X), X)
Пример #2
0
def test_argument_names():
    boston = load_boston()
    X = DataFrame(boston['data'], columns=boston['feature_names'])
    y = boston['target']
    model = GradientBoostingRegressor(verbose=True).fit(X, y)
    code = sklearn2code(model, ['predict'],
                        numpy_flat,
                        argument_names=X.columns)
    boston_housing_module = exec_module('boston_housing_module', code)
    assert_array_almost_equal(model.predict(X),
                              boston_housing_module.predict(**X))
Пример #3
0
 def test_case(self):
     model = clone(estimator)
     model.fit(**fit_data)
     for method in methods:
         pred = getattr(model, method)(**predict_data)
         code = sklearn2code(model, method, numpy_flat)
         try:
             module = exec_module('test_module', code)
             exported_pred = getattr(module,
                                     method)(**export_predict_data['X'])
             if isinstance(exported_pred, tuple):
                 exported_pred = DataFrame(dict(enumerate(exported_pred)))
             assert_array_almost_equal(pred, exported_pred, 3)
         except:
             #                 print(code)
             #                 import clipboard
             #                 clipboard.copy(code)
             raise
Пример #4
0
    def test_case(self):
        model = clone(estimator)
        model.fit(**fit_data)

        for method in methods:
            pred = DataFrame(getattr(model, method)(**predict_data))
            try:
                code = sklearn2code(model, method, pandas)
            except ExpressionTypeNotSupportedError:
                continue
            try:
                module = exec_module('test_module', code)
                exported_pred = getattr(module,
                                        method)(export_predict_data['X'])
                assert_array_almost_equal(pred, exported_pred, 3)
            except:
                #                 print(code)
                import clipboard
                clipboard.copy(code)
                raise
Пример #5
0
def test_super_learner():
    np.random.seed(0)
    X, y = load_boston(return_X_y=True)
    X = pandas.DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])])
    model = CrossValidatingEstimator(SuperLearner(
        [('linear', LinearRegression()), ('earth', Earth(max_degree=2))],
        LinearRegression(),
        cv=5,
        n_jobs=1),
                                     cv=5)
    cv_pred = model.fit_predict(X, y)
    pred = model.predict(X)
    cv_r2 = r2_score(y, cv_pred)
    best_component_cv_r2 = max([
        r2_score(
            y,
            first(model.estimator_.cross_validating_estimators_.values()).
            cv_predictions_) for i in range(2)
    ])
    assert cv_r2 >= .9 * best_component_cv_r2

    code = sklearn2code(model, ['predict'], numpy_flat)
    module = exec_module('module', code)
    test_pred = module.predict(**X)
    try:
        assert_array_almost_equal(np.ravel(pred), np.ravel(test_pred))
    except:
        idx = np.abs(np.ravel(pred) - np.ravel(test_pred)) > .000001
        print(np.ravel(pred)[idx])
        print(np.ravel(test_pred)[idx])
        raise
    print(r2_score(y, pred))
    print(r2_score(y, cv_pred))

    print(
        max([
            r2_score(
                y,
                first(model.estimator_.cross_validating_estimators_.values()).
                cv_predictions_) for i in range(2)
        ]))
Пример #6
0

@sym_predict.register(XGBRegressor)
def sym_predict_xgb_regressor(estimator):
    dump = estimator.get_booster().get_dump()
    inputs = tuple(map(RealVariable, estimator.get_booster().feature_names))
    Var = VariableFactory(existing=inputs)
    calls = tuple(
        map(
            lambda x: ((Var(), ), (x, inputs)),
            map(lambda x: Function(inputs, tuple(), (x.expression(), )),
                map(Node.from_str, dump))))
    output = reduce(__add__, map(compose(first, first), calls)) + RealNumber(
        0.5)  # TODO: Why do I have to add 0.5?
    return Function(inputs, calls, (output, ))


if __name__ == '__main__':
    model = XGBRegressor(n_estimators=2, max_depth=1)
    X, y = make_regression()
    X = DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])])
    model.fit(X, y)
    print(sym_predict(model))
    code = sklearn2code(model, ['predict'], numpy_flat)
    print(code)
    print(model.booster().get_dump()[0])
    module = exec_module('module', code)
    print(module.predict(**X.loc[:10, :]))
    print(model.predict(X.loc[:10, :]))
    1 + 1
Пример #7
0
from sklearn.datasets.base import load_boston
from pyearth.earth import Earth
from pandas import DataFrame
from sklearn2code.sklearn2code import sklearn2code
from sklearn2code.languages import numpy_flat
from sklearn2code.utility import exec_module
from numpy.testing.utils import assert_array_almost_equal
from yapf.yapflib.yapf_api import FormatCode

# Load a data set.
boston = load_boston()
X = DataFrame(boston['data'], columns=boston['feature_names'])
y = boston['target']

# Fit a py-earth model.
model = Earth(max_degree=2).fit(X, y)

# Generate code from the py-earth model.
code = sklearn2code(model, ['predict'], numpy_flat)

# Execute the generated code in its own module.
boston_housing_module = exec_module('boston_housing_module', code)

# Confirm that the generated module produces output identical
# to the fitted model's predict method.
assert_array_almost_equal(model.predict(X), boston_housing_module.predict(**X))

# Print the generated code (using yapf for formatting).
print(FormatCode(code, style_config='pep8')[0])