示例#1
0
def model(xgbr_data_vd):
    current_cursor().execute("DROP MODEL IF EXISTS xgbr_model_test")

    current_cursor().execute(
        "SELECT xgb_regressor('xgbr_model_test', 'public.xgbr_data', 'TransPortation', '*' USING PARAMETERS exclude_columns='id, transportation', min_split_loss=0.1, max_ntree=3, learning_rate=0.2, sampling_size=1, max_depth=6, nbins=40, seed=1, id_column='id')"
    )

    # I could use load_model but it is buggy
    model_class = XGBoostRegressor(
        "xgbr_model_test",
        max_ntree=3,
        min_split_loss=0.1,
        learning_rate=0.2,
        sample=1.0,
        max_depth=6,
        nbins=40,
    )
    model_class.input_relation = "public.xgbr_data"
    model_class.test_relation = model_class.input_relation
    model_class.X = ['"Gender"', '"owned cars"', '"cost"', '"income"']
    model_class.y = '"TransPortation"'
    model_class.prior_ = model_class.get_prior()

    yield model_class
    model_class.drop()
示例#2
0
 def test_to_python(self, model, titanic_vd):
     current_cursor().execute(
         "SELECT PREDICT_XGB_REGRESSOR('Male', 0, 'Cheap', 'Low' USING PARAMETERS model_name = '{}', match_by_pos=True)::float"
         .format(model.name))
     prediction = current_cursor().fetchone()[0]
     assert prediction == pytest.approx(
         float(model.to_python()([["Male", 0, "Cheap", "Low"]])[0]))
def model(rfr_data_vd):
    current_cursor().execute("DROP MODEL IF EXISTS rfr_model_test")

    current_cursor().execute(
        "SELECT rf_regressor('rfr_model_test', 'public.rfr_data', 'TransPortation', '*' USING PARAMETERS exclude_columns='id, transportation', mtry=4, ntree=3, max_breadth=100, sampling_size=1, max_depth=6, min_leaf_size=1, min_info_gain=0.0, nbins=40, seed=1, id_column='id')"
    )

    # I could use load_model but it is buggy
    model_class = RandomForestRegressor(
        "rfr_model_test",
        n_estimators=3,
        max_features=4,
        max_leaf_nodes=100,
        sample=1.0,
        max_depth=6,
        min_samples_leaf=1,
        min_info_gain=0.0,
        nbins=40,
    )
    model_class.input_relation = "public.rfr_data"
    model_class.test_relation = model_class.input_relation
    model_class.X = ['"Gender"', '"owned cars"', '"cost"', '"income"']
    model_class.y = '"TransPortation"'

    yield model_class
    model_class.drop()
示例#4
0
def model(winequality_vd):
    current_cursor().execute("DROP MODEL IF EXISTS norm_model_test")
    model_class = Normalizer("norm_model_test", )
    model_class.fit("public.winequality",
                    ["citric_acid", "residual_sugar", "alcohol"])
    yield model_class
    model_class.drop()
示例#5
0
 def test_to_python(self, model):
     current_cursor().execute(
         "SELECT PREDICT_SVM_REGRESSOR(3.0, 11.0, 93. USING PARAMETERS model_name = '{}', match_by_pos=True)"
         .format(model.name))
     prediction = current_cursor().fetchone()[0]
     assert prediction == pytest.approx(
         model.to_python(return_str=False)([[3.0, 11.0, 93.0]])[0])
示例#6
0
 def test_to_python(self, model):
     current_cursor().execute(
         "SELECT APPLY_BISECTING_KMEANS(5.006, 3.418, 1.464, 0.244 USING PARAMETERS model_name = '{}', match_by_pos=True)"
         .format(model.name))
     prediction = current_cursor().fetchone()
     assert prediction == pytest.approx(
         model.to_python(return_str=False)([[5.006, 3.418, 1.464, 0.244]]))
示例#7
0
 def test_to_python(self, model):
     current_cursor().execute(
         "SELECT APPLY_SVD(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x"
         .format(model.name))
     prediction = current_cursor().fetchone()
     assert prediction == pytest.approx(
         model.to_python(return_str=False)([[3.0, 11.0, 93.0]])[0])
示例#8
0
def model(winequality_vd):
    current_cursor().execute("DROP MODEL IF EXISTS ridge_model_test")
    model_class = Ridge("ridge_model_test", )
    model_class.fit("public.winequality",
                    ["citric_acid", "residual_sugar", "alcohol"], "quality")
    yield model_class
    model_class.drop()
示例#9
0
 def test_to_python(self, model):
     current_cursor().execute(
         "SELECT (APPLY_IFOREST('Male', 0, 'Cheap', 'Low', 1 USING PARAMETERS model_name = '{}', match_by_pos=True)).anomaly_score::float"
         .format(model.name))
     prediction = current_cursor().fetchone()[0]
     assert prediction == pytest.approx(
         model.to_python(return_str=False)([["Male", 0, "Cheap", "Low",
                                             1]])[0])
示例#10
0
 def test_model_from_vDF(self, market_vd):
     current_cursor().execute("DROP MODEL IF EXISTS mca_vDF")
     model_test = MCA("mca_vDF", )
     model_test.fit(market_vd.cdt())
     current_cursor().execute(
         "SELECT model_name FROM models WHERE model_name = 'mca_vDF'")
     assert current_cursor().fetchone()[0] == "mca_vDF"
     model_test.drop()
示例#11
0
 def test_get_voronoi_plot(self, iris_vd):
     current_cursor().execute("DROP MODEL IF EXISTS model_test_plot")
     model_test = KMeans("model_test_plot", )
     model_test.fit(iris_vd, ["SepalLengthCm", "SepalWidthCm"])
     result = model_test.plot_voronoi(color="b")
     assert len(result.gca().get_default_bbox_extra_artists()) == 21
     plt.close("all")
     model_test.drop()
示例#12
0
 def test_model_from_vDF(self, iris_vd):
     current_cursor().execute("DROP MODEL IF EXISTS kmeans_vDF")
     model_test = KMeans("kmeans_vDF", init="random")
     model_test.fit(iris_vd, ["SepalLengthCm", "SepalWidthCm"])
     current_cursor().execute(
         "SELECT model_name FROM models WHERE model_name = 'kmeans_vDF'")
     assert current_cursor().fetchone()[0] == "kmeans_vDF"
     model_test.drop()
示例#13
0
 def test_model_from_vDF(self, titanic_vd):
     current_cursor().execute("DROP MODEL IF EXISTS ohe_vDF")
     model_test = OneHotEncoder("ohe_vDF", drop_first=False)
     model_test.fit(titanic_vd, ["pclass", "embarked"])
     current_cursor().execute(
         "SELECT model_name FROM models WHERE model_name = 'ohe_vDF'")
     assert current_cursor().fetchone()[0] == "ohe_vDF"
     model_test.drop()
 def test_to_sql(self, model):
     current_cursor().execute(
         "SELECT PREDICT_LOGISTIC_REG(3.0, 11.0 USING PARAMETERS model_name = '{}', match_by_pos=True)::float, {}::float".format(
             model.name, model.to_sql([3.0, 11.0])
         )
     )
     prediction = current_cursor().fetchone()
     assert prediction[0] == pytest.approx(prediction[1])
示例#15
0
 def test_get_plot(self, winequality_vd):
     current_cursor().execute("DROP MODEL IF EXISTS model_test_plot")
     model_test = XGBoostRegressor("model_test_plot", )
     model_test.fit(winequality_vd, ["alcohol"], "quality")
     result = model_test.plot()
     assert len(result.get_default_bbox_extra_artists()) in (9, 12)
     plt.close("all")
     model_test.drop()
示例#16
0
 def test_model_from_vDF(self, winequality_vd):
     current_cursor().execute("DROP MODEL IF EXISTS SVD_vDF")
     model_test = SVD("SVD_vDF", )
     model_test.fit(winequality_vd, ["alcohol", "quality"])
     current_cursor().execute(
         "SELECT model_name FROM models WHERE model_name = 'SVD_vDF'")
     assert current_cursor().fetchone()[0] == "SVD_vDF"
     model_test.drop()
示例#17
0
 def test_get_plot(self, winequality_vd):
     current_cursor().execute("DROP MODEL IF EXISTS model_test_plot")
     model_test = BisectingKMeans("model_test_plot", )
     model_test.fit(winequality_vd, ["alcohol", "quality"])
     result = model_test.plot()
     assert len(result.get_default_bbox_extra_artists()) == 16
     plt.close("all")
     model_test.drop()
示例#18
0
 def test_get_plot(self, winequality_vd):
     current_cursor().execute("DROP MODEL IF EXISTS model_test_plot")
     model_test = LinearSVR("model_test_plot", )
     model_test.fit("public.winequality", ["alcohol"], "quality")
     result = model_test.plot()
     assert len(result.get_default_bbox_extra_artists()) == 9
     plt.close("all")
     model_test.drop()
示例#19
0
 def test_model_from_vDF(self, bsk_data_vd):
     current_cursor().execute("DROP MODEL IF EXISTS bsk_vDF")
     model_test = BisectingKMeans("bsk_vDF", )
     model_test.fit(bsk_data_vd, ["col1", "col2", "col3", "col4"])
     current_cursor().execute(
         "SELECT model_name FROM models WHERE model_name = 'bsk_vDF'")
     assert current_cursor().fetchone()[0] == "bsk_vDF"
     model_test.drop()
示例#20
0
    def test_model_from_vDF(self, titanic_vd):
        current_cursor().execute("DROP MODEL IF EXISTS lsvc_from_vDF")
        model_test = LinearSVC("lsvc_from_vDF", )
        model_test.fit(titanic_vd, ["age", "fare"], "survived")

        current_cursor().execute(
            "SELECT model_name FROM models WHERE model_name = 'lsvc_from_vDF'")
        assert current_cursor().fetchone()[0] == "lsvc_from_vDF"

        model_test.drop()
示例#21
0
    def test_model_from_vDF(self, tr_data_vd):
        current_cursor().execute("DROP MODEL IF EXISTS tr_from_vDF")
        model_test = DecisionTreeRegressor("tr_from_vDF", )
        model_test.fit(tr_data_vd, ["gender"], "transportation")

        current_cursor().execute(
            "SELECT model_name FROM models WHERE model_name = 'tr_from_vDF'")
        assert current_cursor().fetchone()[0] == "tr_from_vDF"

        model_test.drop()
示例#22
0
    def test_model_from_vDF(self, winequality_vd):
        current_cursor().execute("DROP MODEL IF EXISTS enet_from_vDF")
        model_test = ElasticNet("enet_from_vDF", )
        model_test.fit(winequality_vd, ["alcohol"], "quality")

        current_cursor().execute(
            "SELECT model_name FROM models WHERE model_name = 'enet_from_vDF'")
        assert current_cursor().fetchone()[0] == "enet_from_vDF"

        model_test.drop()
 def test_to_sql(self, model, titanic_vd):
     model_test = DummyTreeClassifier("rfc_sql_test")
     model_test.drop()
     model_test.fit(titanic_vd, ["age", "fare", "sex"], "survived")
     current_cursor().execute(
         "SELECT PREDICT_RF_CLASSIFIER(* USING PARAMETERS model_name = 'rfc_sql_test', match_by_pos=True)::int, {}::int FROM (SELECT 30.0 AS age, 45.0 AS fare, 'male' AS sex) x"
         .format(model_test.to_sql()))
     prediction = current_cursor().fetchone()
     assert prediction[0] == pytest.approx(prediction[1])
     model_test.drop()
    def test_model_from_vDF(self, rfr_data_vd):
        current_cursor().execute("DROP MODEL IF EXISTS rfr_from_vDF")
        model_test = RandomForestRegressor("rfr_from_vDF", )
        model_test.fit(rfr_data_vd, ["gender"], "transportation")

        current_cursor().execute(
            "SELECT model_name FROM models WHERE model_name = 'rfr_from_vDF'")
        assert current_cursor().fetchone()[0] == "rfr_from_vDF"

        model_test.drop()
示例#25
0
 def test_to_python(self, model):
     # Zscore
     current_cursor().execute(
         "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x"
         .format(model.name))
     prediction = current_cursor().fetchone()[0]
     assert prediction == pytest.approx(
         model.to_python(return_str=False)([[3.0, 11.0, 93.0]])[0][0])
     # Minmax
     model2 = Normalizer("norm_model_test2", method="minmax")
     model2.drop()
     model2.fit("public.winequality",
                ["citric_acid", "residual_sugar", "alcohol"])
     current_cursor().execute(
         "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x"
         .format(model2.name))
     prediction = current_cursor().fetchone()[0]
     assert prediction == pytest.approx(
         model2.to_python(return_str=False)([[3.0, 11.0, 93.0]])[0][0])
     model2.drop()
     # Robust Zscore
     model3 = Normalizer("norm_model_test2", method="robust_zscore")
     model3.drop()
     model3.fit("public.winequality",
                ["citric_acid", "residual_sugar", "alcohol"])
     current_cursor().execute(
         "SELECT APPLY_NORMALIZE(citric_acid, residual_sugar, alcohol USING PARAMETERS model_name = '{}', match_by_pos=True) FROM (SELECT 3.0 AS citric_acid, 11.0 AS residual_sugar, 93. AS alcohol) x"
         .format(model3.name))
     prediction = current_cursor().fetchone()[0]
     assert prediction == pytest.approx(
         model3.to_python(return_str=False)([[3.0, 11.0, 93.0]])[0][0])
     model3.drop()
示例#26
0
 def test_create_verticapy_schema(self):
     drop("verticapy", method="schema")
     create_verticapy_schema()
     current_cursor().execute("""SELECT 
                 table_name 
            FROM columns 
            WHERE table_schema = 'verticapy' 
            GROUP BY 1 ORDER BY 1;""")
     result = [elem[0] for elem in current_cursor().fetchall()]
     assert result == ["attr", "models"]
     drop("verticapy", method="schema")
示例#27
0
 def test_to_sql(self, model, titanic_vd):
     model_test = NaiveBayes("rfc_sql_test")
     model_test.drop()
     model_test.fit(titanic_vd, ["age", "fare", "sex", "pclass"],
                    "survived")
     current_cursor().execute(
         "SELECT PREDICT_NAIVE_BAYES(* USING PARAMETERS model_name = 'rfc_sql_test', match_by_pos=True)::int, {}::int FROM (SELECT 30.0 AS age, 45.0 AS fare, 'male' AS sex, 1 AS pclass) x"
         .format(model_test.to_sql()))
     prediction = current_cursor().fetchone()
     assert prediction[0] == pytest.approx(prediction[1], 1e-3)
     model_test.drop()
示例#28
0
    def test_model_from_vDF(self, winequality_vd):
        current_cursor().execute("DROP MODEL IF EXISTS linreg_from_vDF")
        model_test = LinearRegression("linreg_from_vDF", )
        model_test.fit(winequality_vd, ["alcohol"], "quality")

        current_cursor().execute(
            "SELECT model_name FROM models WHERE model_name = 'linreg_from_vDF'"
        )
        assert current_cursor().fetchone()[0] == "linreg_from_vDF"

        model_test.drop()
示例#29
0
    def test_model_from_vDF(self, iforest_data_vd):
        current_cursor().execute("DROP MODEL IF EXISTS iForest_from_vdf")
        model_test = IsolationForest("iForest_from_vdf", )
        model_test.fit(iforest_data_vd, ["gender"])

        current_cursor().execute(
            "SELECT model_name FROM models WHERE model_name = 'iForest_from_vdf'"
        )
        assert current_cursor().fetchone()[0] == "iForest_from_vdf"

        model_test.drop()
示例#30
0
 def test_does_model_exist(self, titanic_vd):
     current_cursor().execute("CREATE SCHEMA IF NOT EXISTS load_model_test")
     model = LinearRegression("load_model_test.model_test")
     model.drop()
     assert does_model_exist("load_model_test.model_test") == False
     model.fit(titanic_vd, ["age", "fare"], "survived")
     assert does_model_exist("load_model_test.model_test") == True
     assert (does_model_exist(
         "load_model_test.model_test",
         return_model_type=True).lower() == "linear_regression")
     model.drop()
     current_cursor().execute("DROP SCHEMA load_model_test CASCADE")