Exemplo n.º 1
0
def test_sklearn_glm_unknown_link_func():
    estimator = linear_model.TweedieRegressor(
        power=1, link="this_link_func_does_not_exist", max_iter=10)
    estimator = estimator.fit([[1], [2]], [0.1, 0.2])

    assembler = assemblers.SklearnGLMModelAssembler(estimator)
    assembler.assemble()
Exemplo n.º 2
0
 def test_model_tweedie_regressor(self):
     X, y = make_regression(n_features=5,
                            n_samples=100,
                            n_targets=1,
                            random_state=42,
                            n_informative=3)
     y = numpy.abs(y)
     y = y / y.max() + 1e-5
     for power in range(0, 4):
         with self.subTest(power=power):
             model = linear_model.TweedieRegressor(power=power).fit(X, y)
             model_onnx = convert_sklearn(
                 model,
                 "linear regression",
                 [("input", FloatTensorType([None, X.shape[1]]))],
                 target_opset=TARGET_OPSET)
             self.check_model(model_onnx, X.astype(numpy.float32))
             dump_data_and_model(X.astype(numpy.float32),
                                 model,
                                 model_onnx,
                                 basename="SklearnTweedieRegressor%d-Dec4" %
                                 power)
             model_onnx = convert_sklearn(
                 model,
                 "linear regression",
                 [("input", DoubleTensorType([None, X.shape[1]]))],
                 target_opset=TARGET_OPSET)
             dump_data_and_model(X.astype(numpy.float64),
                                 model,
                                 model_onnx,
                                 basename="SklearnTweedieRegressor64%d" %
                                 power)
Exemplo n.º 3
0
def test_sklearn_glm_identity_link_func():
    estimator = linear_model.TweedieRegressor(power=0,
                                              link="identity",
                                              max_iter=10)
    estimator = estimator.fit([[1], [2]], [0.1, 0.2])

    assembler = assemblers.SklearnGLMModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.BinNumExpr(
        ast.NumVal(0.12),
        ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(0.02),
                       ast.BinNumOpType.MUL), ast.BinNumOpType.ADD)

    assert utils.cmp_exprs(actual, expected)
Exemplo n.º 4
0
def test_sklearn_glm_log_link_func():
    estimator = linear_model.TweedieRegressor(power=1, link="log", fit_intercept=False, max_iter=10)
    estimator = estimator.fit([[1], [2]], [0.1, 0.2])

    assembler = assemblers.SklearnGLMModelAssembler(estimator)
    actual = assembler.assemble()

    expected = ast.ExpExpr(
        ast.BinNumExpr(
            ast.NumVal(0.0),
            ast.BinNumExpr(
                ast.FeatureRef(0),
                ast.NumVal(-0.4619711397),
                ast.BinNumOpType.MUL),
            ast.BinNumOpType.ADD))

    assert utils.cmp_exprs(actual, expected)
Exemplo n.º 5
0
        regression(linear_model.LassoLarsIC()),
        regression(linear_model.LinearRegression()),
        regression(linear_model.OrthogonalMatchingPursuit()),
        regression(linear_model.OrthogonalMatchingPursuitCV()),
        regression(
            linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.PoissonRegressor()),
        regression(
            linear_model.RANSACRegressor(
                base_estimator=tree.ExtraTreeRegressor(**TREE_PARAMS),
                random_state=RANDOM_SEED)),
        regression(linear_model.Ridge(random_state=RANDOM_SEED)),
        regression(linear_model.RidgeCV()),
        regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.TweedieRegressor(power=0.0)),
        regression(linear_model.TweedieRegressor(power=1.0)),
        regression(linear_model.TweedieRegressor(power=1.5)),
        regression(linear_model.TweedieRegressor(power=2.0)),
        regression(linear_model.TweedieRegressor(power=3.0)),

        # Statsmodels Linear Regression
        classification_binary(
            utils.StatsmodelsSklearnLikeWrapper(
                sm.GLM,
                dict(fit_constrained=dict(constraints=(
                    np.eye(utils.get_binary_classification_model_trainer().
                           X_train.shape[-1])[0], [1]))))),
        classification_binary(
            utils.StatsmodelsSklearnLikeWrapper(
                sm.GLM,
Exemplo n.º 6
0
def model_selector(model):
    # Ye good ol ugly if-elif switch to choose the model
    if model == "linear":
        regr = linear_model.LinearRegression(n_jobs=-1)

    elif model == "lasso":
        regr = linear_model.Lasso(random_state=17)

    elif model == "elasticnet" or model == "elastic":
        regr = linear_model.ElasticNet(random_state=17)

    elif model == "bayesian":
        regr = linear_model.BayesianRidge()

    elif model == "decision tree regressor" or model == "dtr":
        regr = tree.DecisionTreeRegressor(max_depth=8, min_samples_leaf=17, random_state=17)

    elif model == "tweedie regressor 0" or model == "normal distribution":
        regr = linear_model.TweedieRegressor(power=0)

    elif model == "tweedie regressor 1" or model == "poisson distribution":
        regr = linear_model.TweedieRegressor(power=1)

    elif model == "extra trees regressor" or model == "etr":
        regr = ensemble.ExtraTreesRegressor(max_depth=8, min_samples_leaf=17, random_state=17)

    elif model == "random forest regressor" or model == "rfr":
        regr = ensemble.RandomForestRegressor(n_estimators=500, oob_score=True, random_state=17, n_jobs=-1)

    elif model == "adaboost extra trees" or model == "boost et":
        regr = AdaBoostRegressor(ensemble.ExtraTreesRegressor(max_depth=8, min_samples_leaf=17, random_state=17),
                                 n_estimators=500, random_state=17)

    elif model == "k neighbours" or model == "k neighbor":
        regr = neighbors.KNeighborsRegressor(n_jobs=-1)

    elif model == "gradient boosting regressor" or model == "gbr":
        regr = ensemble.GradientBoostingRegressor(random_state=17)

    elif model == "voting":
        clf1 = linear_model.LinearRegression(n_jobs=-1)
        clf2 = ensemble.RandomForestRegressor(max_depth=8, min_samples_leaf=17, random_state=17, n_jobs=-1)
        clf3 = ensemble.GradientBoostingRegressor(random_state=17)
        regr = ensemble.VotingRegressor(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], n_jobs=-1)

    elif model == "logistic":
        regr = linear_model.LogisticRegression(max_iter=250, random_state=17, n_jobs=-1)

    elif model == "gaussian":
        regr = GaussianNB()

    elif model == "decision tree classifier" or model == "dtc":
        regr = tree.DecisionTreeClassifier(max_depth=8, min_samples_leaf=17, random_state=17)

    elif model == "extra tree classifier" or model == "etc":
        regr = ensemble.ExtraTreesClassifier(max_depth=8, min_samples_leaf=17, random_state=17)

    elif model == "random forest classifier" or model == "rfc":
        regr = ensemble.RandomForestClassifier(max_depth=8, min_samples_leaf=17, random_state=17)

    elif model == "linear svc":
        regr = svm.LinearSVC(random_state=17)

    elif model == "k neighbour classifier" or model == "k neighbor classifier":
        regr = neighbors.KNeighborsClassifier(n_jobs=-1, n_neighbors=2)

    elif model == "svc":
        regr = svm.SVC(kernel="rbf", probability=True, random_state=17)

    return regr