def test_predict_classifier_wo_sigmoid(self): ret_sql = f"""\ -- client: molehill/{molehill.__version__} with features_exploded as ( select id , extract_feature(fv) as feature , extract_weight(fv) as value from target_tbl t1 LATERAL VIEW explode(features) t2 as fv ) -- DIGDAG_INSERT_LINE select t1.id , sum(m1.weight * t1.value) as total_weight from features_exploded t1 left outer join model_tbl m1 on (t1.feature = m1.feature) group by t1.id ; """ pred_sql, pred_col = predict_classifier("target_tbl", "id", "model_tbl", sigmoid=False) assert pred_sql == ret_sql assert pred_col == "total_weight"
def test_predict_classifier_bias_hashing(self): ret_sql = f"""\ -- client: molehill/{molehill.__version__} with features_exploded as ( select id , extract_feature(fv) as feature , extract_weight(fv) as value from target_tbl t1 LATERAL VIEW explode(add_bias(feature_hashing(features))) t2 as fv ) -- DIGDAG_INSERT_LINE select t1.id , sigmoid(sum(m1.weight * t1.value)) as probability from features_exploded t1 left outer join model_tbl m1 on (t1.feature = m1.feature) group by t1.id ; """ pred_sql, pred_col = predict_classifier("target_tbl", "id", "model_tbl", bias=True, hashing=True) assert pred_sql == ret_sql assert pred_col == "probability"