def model(base, dtc_data_vd): base.cursor.execute("DROP MODEL IF EXISTS decision_tc_model_test") base.cursor.execute( "SELECT rf_classifier('decision_tc_model_test', 'public.dtc_data', 'TransPortation', '*' USING PARAMETERS exclude_columns='id, TransPortation', mtry=4, ntree=1, max_breadth=100, sampling_size=1, max_depth=6, nbins=40, seed=1, id_column='id')" ) # I could use load_model but it is buggy model_class = DecisionTreeClassifier( "decision_tc_model_test", cursor=base.cursor, max_features=4, max_leaf_nodes=100, max_depth=6, min_samples_leaf=1, min_info_gain=0, nbins=40, ) model_class.input_relation = "public.dtc_data" model_class.test_relation = model_class.input_relation model_class.X = ["Gender", '"owned cars"', "cost", "income"] model_class.y = "TransPortation" base.cursor.execute( "SELECT DISTINCT {} FROM {} WHERE {} IS NOT NULL ORDER BY 1".format( model_class.y, model_class.input_relation, model_class.y)) classes = base.cursor.fetchall() model_class.classes_ = [item[0] for item in classes] yield model_class model_class.drop()
def test_to_sql(self, model, titanic_vd): model_test = DecisionTreeClassifier("rfc_sql_test") model_test.drop() model_test.fit(titanic_vd, ["age", "fare", "sex"], "survived") current_cursor().execute( "SELECT PREDICT_RF_CLASSIFIER(* USING PARAMETERS model_name = 'rfc_sql_test', match_by_pos=True)::int, {}::int FROM (SELECT 30.0 AS age, 45.0 AS fare, 'male' AS sex) x" .format(model_test.to_sql())) prediction = current_cursor().fetchone() assert prediction[0] == prediction[1] model_test.drop()
def test_contour(self, base, titanic_vd): model_test = DecisionTreeClassifier("model_contour", cursor=base.cursor) model_test.drop() model_test.fit( titanic_vd, ["age", "fare",], "survived", ) result = model_test.contour() assert len(result.get_default_bbox_extra_artists()) == 34 model_test.drop()
def test_to_sql(self, model, titanic_vd): model_test = DecisionTreeClassifier("rfc_sql_test", cursor=model.cursor) model_test.drop() model_test.fit(titanic_vd, ["age", "fare", "sex"], "survived") model.cursor.execute( "SELECT PREDICT_RF_CLASSIFIER(* USING PARAMETERS model_name = 'rfc_sql_test', match_by_pos=True, class=1, type='probability')::float, {}::float FROM (SELECT 30.0 AS age, 45.0 AS fare, 'male' AS sex) x".format( model_test.to_sql() ) ) prediction = model.cursor.fetchone() assert prediction[0] == pytest.approx(prediction[1], 1e-2) model_test.drop()
def test_to_python(self, model, titanic_vd): model_test = DecisionTreeClassifier("rfc_python_test", cursor=model.cursor) model_test.drop() model_test.fit(titanic_vd, ["age", "fare", "sex"], "embarked") model_test.cursor.execute( "SELECT PREDICT_RF_CLASSIFIER(30.0, 45.0, 'male' USING PARAMETERS model_name = 'rfc_python_test', match_by_pos=True)" ) prediction = model_test.cursor.fetchone()[0] assert prediction == model_test.to_python(return_str=False)([[30.0, 45.0, 'male']])[0] model_test.cursor.execute( "SELECT PREDICT_RF_CLASSIFIER(30.0, 145.0, 'female' USING PARAMETERS model_name = 'rfc_python_test', match_by_pos=True)" ) prediction = model_test.cursor.fetchone()[0] assert prediction == model_test.to_python(return_str=False)([[30.0, 145.0, 'female']])[0]
def test_model_from_vDF(self, base, dtc_data_vd): base.cursor.execute("DROP MODEL IF EXISTS tc_from_vDF") model_test = DecisionTreeClassifier("tc_from_vDF", cursor=base.cursor) model_test.fit(dtc_data_vd, ["Gender", '"owned cars"', "cost", "income"], "TransPortation") base.cursor.execute( "SELECT model_name FROM models WHERE model_name = 'tc_from_vDF'") assert base.cursor.fetchone()[0] == "tc_from_vDF" model_test.drop()
def test_drop(self, base): base.cursor.execute("DROP MODEL IF EXISTS decision_tc_model_test_drop") model_test = DecisionTreeClassifier("decision_tc_model_test_drop", cursor=base.cursor) model_test.fit( "public.dtc_data", ["Gender", '"owned cars"', "cost", "income"], "TransPortation", ) base.cursor.execute( "SELECT model_name FROM models WHERE model_name = 'decision_tc_model_test_drop'" ) assert base.cursor.fetchone()[0] == "decision_tc_model_test_drop" model_test.drop() base.cursor.execute( "SELECT model_name FROM models WHERE model_name = 'decision_tc_model_test_drop'" ) assert base.cursor.fetchone() is None
def test_repr(self, model): assert "SELECT rf_classifier('public.decision_tc_model_test'," in model.__repr__() model_repr = DecisionTreeClassifier("RF_repr") model_repr.drop() assert model_repr.__repr__() == "<RandomForestClassifier>"