def model(base, dtc_data_vd):
    base.cursor.execute("DROP MODEL IF EXISTS decision_tc_model_test")

    base.cursor.execute(
        "SELECT rf_classifier('decision_tc_model_test', 'public.dtc_data', 'TransPortation', '*' USING PARAMETERS exclude_columns='id, TransPortation', mtry=4, ntree=1, max_breadth=100, sampling_size=1, max_depth=6, nbins=40, seed=1, id_column='id')"
    )

    # I could use load_model but it is buggy
    model_class = DecisionTreeClassifier(
        "decision_tc_model_test",
        cursor=base.cursor,
        max_features=4,
        max_leaf_nodes=100,
        max_depth=6,
        min_samples_leaf=1,
        min_info_gain=0,
        nbins=40,
    )
    model_class.input_relation = "public.dtc_data"
    model_class.test_relation = model_class.input_relation
    model_class.X = ["Gender", '"owned cars"', "cost", "income"]
    model_class.y = "TransPortation"
    base.cursor.execute(
        "SELECT DISTINCT {} FROM {} WHERE {} IS NOT NULL ORDER BY 1".format(
            model_class.y, model_class.input_relation, model_class.y))
    classes = base.cursor.fetchall()
    model_class.classes_ = [item[0] for item in classes]

    yield model_class
    model_class.drop()
Пример #2
0
 def test_to_sql(self, model, titanic_vd):
     model_test = DecisionTreeClassifier("rfc_sql_test")
     model_test.drop()
     model_test.fit(titanic_vd, ["age", "fare", "sex"], "survived")
     current_cursor().execute(
         "SELECT PREDICT_RF_CLASSIFIER(* USING PARAMETERS model_name = 'rfc_sql_test', match_by_pos=True)::int, {}::int FROM (SELECT 30.0 AS age, 45.0 AS fare, 'male' AS sex) x"
         .format(model_test.to_sql()))
     prediction = current_cursor().fetchone()
     assert prediction[0] == prediction[1]
     model_test.drop()
 def test_contour(self, base, titanic_vd):
     model_test = DecisionTreeClassifier("model_contour", cursor=base.cursor)
     model_test.drop()
     model_test.fit(
         titanic_vd,
         ["age", "fare",],
         "survived",
     )
     result = model_test.contour()
     assert len(result.get_default_bbox_extra_artists()) == 34
     model_test.drop()
 def test_to_sql(self, model, titanic_vd):
     model_test = DecisionTreeClassifier("rfc_sql_test", cursor=model.cursor)
     model_test.drop()
     model_test.fit(titanic_vd, ["age", "fare", "sex"], "survived")
     model.cursor.execute(
         "SELECT PREDICT_RF_CLASSIFIER(* USING PARAMETERS model_name = 'rfc_sql_test', match_by_pos=True, class=1, type='probability')::float, {}::float FROM (SELECT 30.0 AS age, 45.0 AS fare, 'male' AS sex) x".format(
             model_test.to_sql()
         )
     )
     prediction = model.cursor.fetchone()
     assert prediction[0] == pytest.approx(prediction[1], 1e-2)
     model_test.drop()
 def test_to_python(self, model, titanic_vd):
     model_test = DecisionTreeClassifier("rfc_python_test", cursor=model.cursor)
     model_test.drop()
     model_test.fit(titanic_vd, ["age", "fare", "sex"], "embarked")
     model_test.cursor.execute(
         "SELECT PREDICT_RF_CLASSIFIER(30.0, 45.0, 'male' USING PARAMETERS model_name = 'rfc_python_test', match_by_pos=True)"
     )
     prediction = model_test.cursor.fetchone()[0]
     assert prediction == model_test.to_python(return_str=False)([[30.0, 45.0, 'male']])[0]
     model_test.cursor.execute(
         "SELECT PREDICT_RF_CLASSIFIER(30.0, 145.0, 'female' USING PARAMETERS model_name = 'rfc_python_test', match_by_pos=True)"
     )
     prediction = model_test.cursor.fetchone()[0]
     assert prediction == model_test.to_python(return_str=False)([[30.0, 145.0, 'female']])[0]
    def test_model_from_vDF(self, base, dtc_data_vd):
        base.cursor.execute("DROP MODEL IF EXISTS tc_from_vDF")
        model_test = DecisionTreeClassifier("tc_from_vDF", cursor=base.cursor)
        model_test.fit(dtc_data_vd,
                       ["Gender", '"owned cars"', "cost", "income"],
                       "TransPortation")

        base.cursor.execute(
            "SELECT model_name FROM models WHERE model_name = 'tc_from_vDF'")
        assert base.cursor.fetchone()[0] == "tc_from_vDF"

        model_test.drop()
    def test_drop(self, base):
        base.cursor.execute("DROP MODEL IF EXISTS decision_tc_model_test_drop")
        model_test = DecisionTreeClassifier("decision_tc_model_test_drop",
                                            cursor=base.cursor)
        model_test.fit(
            "public.dtc_data",
            ["Gender", '"owned cars"', "cost", "income"],
            "TransPortation",
        )

        base.cursor.execute(
            "SELECT model_name FROM models WHERE model_name = 'decision_tc_model_test_drop'"
        )
        assert base.cursor.fetchone()[0] == "decision_tc_model_test_drop"

        model_test.drop()
        base.cursor.execute(
            "SELECT model_name FROM models WHERE model_name = 'decision_tc_model_test_drop'"
        )
        assert base.cursor.fetchone() is None
 def test_repr(self, model):
     assert "SELECT rf_classifier('public.decision_tc_model_test'," in model.__repr__()
     model_repr = DecisionTreeClassifier("RF_repr")
     model_repr.drop()
     assert model_repr.__repr__() == "<RandomForestClassifier>"