def test_regression_saved_without_feature_names(): base_score = 0.6 estimator = xgboost.XGBRegressor(n_estimators=2, random_state=1, max_depth=1, base_score=base_score) utils.train_model_regression(estimator) with utils.tmp_dir() as tmp_dirpath: filename = os.path.join(tmp_dirpath, "tmp.file") estimator.save_model(filename) estimator = xgboost.XGBRegressor(base_score=base_score) estimator.load_model(filename) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(base_score), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(12), ast.NumVal(9.72500038), ast.CompOpType.GTE), ast.NumVal(1.6614188), ast.NumVal(2.91697121))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.94099998), ast.CompOpType.GTE), ast.NumVal(3.33810854), ast.NumVal(1.71813202))), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_multi_class(): estimator = ensemble.RandomForestClassifier(n_estimators=2, random_state=13) estimator.fit([[1], [2], [3]], [1, -1, 1]) assembler = assemblers.RandomForestModelAssembler(estimator) actual = assembler.assemble() expected = ast.BinVectorExpr( ast.BinVectorNumExpr( ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(0), ast.NumVal(1.5), ast.CompOpType.LTE), ast.VectorVal([ast.NumVal(0.0), ast.NumVal(1.0)]), ast.VectorVal([ast.NumVal(1.0), ast.NumVal(0.0)]))), ast.NumVal(0.5), ast.BinNumOpType.MUL), ast.BinVectorNumExpr( ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(0), ast.NumVal(2.5), ast.CompOpType.LTE), ast.VectorVal([ast.NumVal(1.0), ast.NumVal(0.0)]), ast.VectorVal([ast.NumVal(0.0), ast.NumVal(1.0)]))), ast.NumVal(0.5), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD) assert utils.cmp_exprs(actual, expected)
def test_regression_random_forest(): estimator = lightgbm.LGBMRegressor(boosting_type="rf", n_estimators=2, random_state=1, max_depth=1, subsample=0.7, subsample_freq=1) utils.get_regression_model_trainer()(estimator) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.954000000000001), ast.CompOpType.GT), ast.NumVal(37.24347877367631), ast.NumVal(19.936999995530854))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.971500000000001), ast.CompOpType.GT), ast.NumVal(38.48600037864964), ast.NumVal(20.183783757300255))), ast.BinNumOpType.ADD), ast.NumVal(0.5), ast.BinNumOpType.MUL)) assert utils.cmp_exprs(actual, expected)
def test_regression(): estimator = lightgbm.LGBMRegressor(n_estimators=2, random_state=1, max_depth=1) utils.get_regression_model_trainer()(estimator) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.918), ast.CompOpType.GT), ast.NumVal(24.011454621684155), ast.NumVal(22.289277544391084))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(12), ast.NumVal(9.63), ast.CompOpType.GT), ast.NumVal(-0.49461212269771115), ast.NumVal(0.7174324413014594))), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_regression_random_forest(): base_score = 0.6 estimator = xgboost.XGBRFRegressor(n_estimators=2, random_state=1, max_depth=1, base_score=base_score) utils.get_regression_model_trainer()(estimator) assembler = assemblers.XGBoostModelAssemblerSelector(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0.6), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.8375001), ast.CompOpType.GTE), ast.NumVal(17.3671646), ast.NumVal(9.48354053))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(12), ast.NumVal(9.72500038), ast.CompOpType.GTE), ast.NumVal(8.31587982), ast.NumVal(14.7766275))), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_multi_class(): estimator = xgboost.XGBClassifier(n_estimators=1, random_state=1, max_depth=1) estimator.fit(np.array([[1], [2], [3]]), np.array([1, 2, 3])) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() exponent = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr(ast.NumVal(0.5), ast.SubroutineExpr(ast.NumVal(0.0)), ast.BinNumOpType.ADD)), to_reuse=True) exponent_sum = ast.BinNumExpr(ast.BinNumExpr(exponent, exponent, ast.BinNumOpType.ADD), exponent, ast.BinNumOpType.ADD, to_reuse=True) softmax = ast.BinNumExpr(exponent, exponent_sum, ast.BinNumOpType.DIV) expected = ast.VectorVal([softmax] * 3) assert utils.cmp_exprs(actual, expected)
def test_regression(): base_score = 0.6 estimator = xgboost.XGBRegressor(n_estimators=2, random_state=1, max_depth=1, base_score=base_score) utils.train_model_regression(estimator) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(base_score), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(12), ast.NumVal(9.72500038), ast.CompOpType.GTE), ast.NumVal(1.6614188), ast.NumVal(2.91697121))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.94099998), ast.CompOpType.GTE), ast.NumVal(3.33810854), ast.NumVal(1.71813202))), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def _split_into_subroutines(self, trees_ast, trees_num_leaves): result = [] subroutine_trees = [] subroutine_sum_leaves = 0 for tree, num_leaves in zip(trees_ast, trees_num_leaves): next_sum = subroutine_sum_leaves + num_leaves if subroutine_trees and next_sum > self._leaves_cutoff_threshold: # Exceeded the max leaves in the current subroutine, # finalize this one and start a new one. partial_result = utils.apply_op_to_expressions( ast.BinNumOpType.ADD, *subroutine_trees) result.append(ast.SubroutineExpr(partial_result)) subroutine_trees = [] subroutine_sum_leaves = 0 subroutine_sum_leaves += num_leaves subroutine_trees.append(tree) if subroutine_trees: partial_result = utils.apply_op_to_expressions( ast.BinNumOpType.ADD, *subroutine_trees) result.append(ast.SubroutineExpr(partial_result)) return result
def test_multi_class_best_ntree_limit(): base_score = 0.5 estimator = xgboost.XGBClassifier(n_estimators=100, random_state=1, max_depth=1, base_score=base_score) estimator.best_ntree_limit = 1 utils.train_model_classification(estimator) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() estimator_exp_class1 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.5999999), ast.CompOpType.GTE), ast.NumVal(-0.0731707439), ast.NumVal(0.142857149)), ast.BinNumOpType.ADD)), to_reuse=True) estimator_exp_class2 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.5999999), ast.CompOpType.GTE), ast.NumVal(0.0341463387), ast.NumVal(-0.0714285821)), ast.BinNumOpType.ADD)), to_reuse=True) estimator_exp_class3 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(4.85000038), ast.CompOpType.GTE), ast.NumVal(0.129441619), ast.NumVal(-0.0681440532)), ast.BinNumOpType.ADD)), to_reuse=True) exp_sum = ast.BinNumExpr(ast.BinNumExpr(estimator_exp_class1, estimator_exp_class2, ast.BinNumOpType.ADD), estimator_exp_class3, ast.BinNumOpType.ADD, to_reuse=True) expected = ast.VectorVal([ ast.BinNumExpr(estimator_exp_class1, exp_sum, ast.BinNumOpType.DIV), ast.BinNumExpr(estimator_exp_class2, exp_sum, ast.BinNumOpType.DIV), ast.BinNumExpr(estimator_exp_class3, exp_sum, ast.BinNumOpType.DIV) ]) assert utils.cmp_exprs(actual, expected)
def test_multi_class(): estimator = linear_model.LogisticRegression() estimator.coef_ = np.array([[1, 2], [3, 4], [5, 6]]) estimator.intercept_ = np.array([7, 8, 9]) assembler = assemblers.LinearModelAssembler(estimator) actual = assembler.assemble() expected = ast.VectorVal([ ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(7), ast.BinNumExpr( ast.FeatureRef(0), ast.NumVal(1), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( ast.FeatureRef(1), ast.NumVal(2), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD)), ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(8), ast.BinNumExpr( ast.FeatureRef(0), ast.NumVal(3), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( ast.FeatureRef(1), ast.NumVal(4), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD)), ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(9), ast.BinNumExpr( ast.FeatureRef(0), ast.NumVal(5), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( ast.FeatureRef(1), ast.NumVal(6), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD))]) assert utils.cmp_exprs(actual, expected)
def test_leaves_cutoff_threshold(): estimator = lightgbm.LGBMClassifier(n_estimators=2, random_state=1, max_depth=1) utils.train_model_classification_binary(estimator) assembler = assemblers.LightGBMModelAssembler(estimator, leaves_cutoff_threshold=1) actual = assembler.assemble() sigmoid = ast.BinNumExpr( ast.NumVal(1), ast.BinNumExpr( ast.NumVal(1), ast.ExpExpr( ast.BinNumExpr( ast.NumVal(0), ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0), ast.SubroutineExpr( ast.SubroutineExpr( ast.IfExpr( ast.CompExpr( ast.FeatureRef(23), ast.NumVal(868.2000000000002), ast.CompOpType.GT), ast.NumVal(0.25986931215073095), ast.NumVal(0.6237178414050242)))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.SubroutineExpr( ast.IfExpr( ast.CompExpr( ast.FeatureRef(7), ast.NumVal(0.05142), ast.CompOpType.GT), ast.NumVal(-0.1909605544006228), ast.NumVal(0.1293965108676673)))), ast.BinNumOpType.ADD)), ast.BinNumOpType.SUB)), ast.BinNumOpType.ADD), ast.BinNumOpType.DIV, to_reuse=True) expected = ast.VectorVal([ ast.BinNumExpr(ast.NumVal(1), sigmoid, ast.BinNumOpType.SUB), sigmoid]) assert utils.cmp_exprs(actual, expected)
def test_leaves_cutoff_threshold(): estimator = xgboost.XGBClassifier(n_estimators=2, random_state=1, max_depth=1) utils.train_model_classification_binary(estimator) assembler = assemblers.XGBoostModelAssembler(estimator, leaves_cutoff_threshold=1) actual = assembler.assemble() sigmoid = ast.BinNumExpr( ast.NumVal(1), ast.BinNumExpr( ast.NumVal(1), ast.ExpExpr( ast.BinNumExpr( ast.NumVal(0), ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(-0.0), ast.SubroutineExpr( ast.SubroutineExpr( ast.IfExpr( ast.CompExpr( ast.FeatureRef(20), ast.NumVal(16.7950001), ast.CompOpType.GTE), ast.NumVal(-0.173057005), ast.NumVal(0.163440868)))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(27), ast.NumVal(0.142349988), ast.CompOpType.GTE), ast.NumVal(-0.161026895), ast.NumVal(0.149405137)))), ast.BinNumOpType.ADD)), ast.BinNumOpType.SUB)), ast.BinNumOpType.ADD), ast.BinNumOpType.DIV, to_reuse=True) expected = ast.VectorVal([ ast.BinNumExpr(ast.NumVal(1), sigmoid, ast.BinNumOpType.SUB), sigmoid ]) assert utils.cmp_exprs(actual, expected)
def test_leaves_cutoff_threshold(): estimator = lightgbm.LGBMClassifier(n_estimators=2, random_state=1, max_depth=1) utils.train_model_classification_binary(estimator) assembler = assemblers.LightGBMModelAssembler(estimator, leaves_cutoff_threshold=1) actual = assembler.assemble() sigmoid = ast.BinNumExpr( ast.NumVal(1), ast.BinNumExpr( ast.NumVal(1), ast.ExpExpr( ast.BinNumExpr( ast.NumVal(0), ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr( ast.FeatureRef(23), ast.NumVal(868.2000000000002), ast.CompOpType.GT), ast.NumVal(0.2762557140263451), ast.NumVal(0.6399134166614473))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr( ast.FeatureRef(27), ast.NumVal(0.14205000000000004), ast.CompOpType.GT), ast.NumVal(-0.2139321843285849), ast.NumVal(0.1151466338793227))), ast.BinNumOpType.ADD)), ast.BinNumOpType.SUB)), ast.BinNumOpType.ADD), ast.BinNumOpType.DIV, to_reuse=True) expected = ast.VectorVal([ ast.BinNumExpr(ast.NumVal(1), sigmoid, ast.BinNumOpType.SUB), sigmoid ]) assert utils.cmp_exprs(actual, expected)
def test_multi_output(): expr = ast.SubroutineExpr( ast.IfExpr( ast.CompExpr( ast.NumVal(1), ast.NumVal(1), ast.CompOpType.EQ), ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.VectorVal([ast.NumVal(3), ast.NumVal(4)]))) expected_code = """ namespace ML { public static class Model { public static double[] Score(double[] input) { double[] var0; if ((1) == (1)) { var0 = new double[2] {1, 2}; } else { var0 = new double[2] {3, 4}; } return var0; } } } """ interpreter = CSharpInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_regression(): estimator = lightgbm.LGBMRegressor(n_estimators=2, random_state=1, max_depth=1) utils.train_model_regression(estimator) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0), ast.IfExpr( ast.CompExpr( ast.FeatureRef(5), ast.NumVal(6.8455), ast.CompOpType.GT), ast.NumVal(24.007392728914056), ast.NumVal(22.35695742616179)), ast.BinNumOpType.ADD), ast.IfExpr( ast.CompExpr( ast.FeatureRef(12), ast.NumVal(9.63), ast.CompOpType.GT), ast.NumVal(-0.4903836928981587), ast.NumVal(0.7222498915097475)), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def assemble_tree_expr(t): assembler = TreeModelAssembler(t) return utils.apply_bin_op( ast.SubroutineExpr(assembler.assemble()), ast.NumVal(coef), ast.BinNumOpType.MUL)
def test_multi_output(): expr = ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.NumVal(1), ast.NumVal(1), ast.CompOpType.EQ), ast.VectorVal([ast.NumVal(1), ast.NumVal(2)]), ast.VectorVal([ast.NumVal(3), ast.NumVal(4)]))) expected_code = """ Module Model Function score(ByRef input_vector() As Double) As Double() Dim var0() As Double If (1) == (1) Then Dim var1(1) As Double var1(0) = 1 var1(1) = 2 var0 = var1 Else Dim var2(1) As Double var2(0) = 3 var2(1) = 4 var0 = var2 End If score = var0 End Function End Module """ interpreter = VisualBasicInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_regression_best_ntree_limit(): base_score = 0.6 estimator = xgboost.XGBRegressor(n_estimators=3, random_state=1, max_depth=1, base_score=base_score) estimator.best_ntree_limit = 2 utils.train_model_regression(estimator) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(base_score), ast.IfExpr( ast.CompExpr( ast.FeatureRef(12), ast.NumVal(9.72500038), ast.CompOpType.GTE), ast.NumVal(1.67318344), ast.NumVal(2.92757893)), ast.BinNumOpType.ADD), ast.IfExpr( ast.CompExpr( ast.FeatureRef(5), ast.NumVal(6.94099998), ast.CompOpType.GTE), ast.NumVal(3.3400948), ast.NumVal(1.72118247)), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def _assemble_single_output(self, trees, base_score=0): if self._tree_limit: trees = trees[:self._tree_limit] trees_ast = [self._assemble_tree(t) for t in trees] result_ast = utils.apply_op_to_expressions(ast.BinNumOpType.ADD, ast.NumVal(base_score), *trees_ast) return ast.SubroutineExpr(result_ast)
def test_single_condition(): estimator = ensemble.RandomForestRegressor(n_estimators=2, random_state=1) estimator.fit([[1], [2]], [1, 2]) assembler = assemblers.RandomForestModelAssembler(estimator) actual = assembler.assemble() expected = ast.BinNumExpr( ast.BinNumExpr( ast.SubroutineExpr(ast.NumVal(1.0)), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(0), ast.NumVal(1.5), ast.CompOpType.LTE), ast.NumVal(1.0), ast.NumVal(2.0))), ast.BinNumOpType.ADD), ast.NumVal(0.5), ast.BinNumOpType.MUL) assert utils.cmp_exprs(actual, expected)
def kernel_ast(sup_vec_value): return ast.SubroutineExpr( ast.ExpExpr( ast.BinNumExpr( negative_gamma_ast, ast.PowExpr( ast.BinNumExpr(ast.NumVal(sup_vec_value), ast.FeatureRef(0), ast.BinNumOpType.SUB), ast.NumVal(2)), ast.BinNumOpType.MUL)))
def _assemble_single_output(self, trees, base_score=0): if self._tree_limit: trees = trees[:self._tree_limit] trees_ast = [ast.SubroutineExpr(self._assemble_tree(t)) for t in trees] to_sum = trees_ast # In a large tree we need to generate multiple subroutines to avoid # java limitations https://github.com/BayesWitnesses/m2cgen/issues/103. trees_num_leaves = [self._count_leaves(t) for t in trees] if sum(trees_num_leaves) > self._leaves_cutoff_threshold: to_sum = self._split_into_subroutines(trees_ast, trees_num_leaves) tmp_ast = utils.apply_op_to_expressions(ast.BinNumOpType.ADD, ast.NumVal(base_score), *to_sum) result_ast = self._final_transform(tmp_ast) return ast.SubroutineExpr(result_ast)
def kernel_ast(sup_vec_value): return ast.SubroutineExpr( ast.PowExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(estimator.gamma), ast.BinNumExpr(ast.NumVal(sup_vec_value), ast.FeatureRef(0), ast.BinNumOpType.MUL), ast.BinNumOpType.MUL), ast.NumVal(0.0), ast.BinNumOpType.ADD), ast.NumVal(estimator.degree)))
def _build_ast(self): coef = utils.to_2d_array(self._get_coef()) intercept = utils.to_1d_array(self._get_intercept()) if coef.shape[0] == 1: return _linear_to_ast(coef[0], intercept[0]) exprs = [] for idx in range(coef.shape[0]): exprs.append( ast.SubroutineExpr(_linear_to_ast(coef[idx], intercept[idx]))) return ast.VectorVal(exprs)
def test_multi_class_best_ntree_limit(): base_score = 0.5 estimator = xgboost.XGBClassifier(n_estimators=100, random_state=1, max_depth=1, base_score=base_score) estimator.best_ntree_limit = 1 utils.train_model_classification(estimator) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() estimator_exp_class1 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.45000005), ast.CompOpType.GTE), ast.NumVal(-0.0733167157), ast.NumVal(0.143414631))), ast.BinNumOpType.ADD)), to_reuse=True) estimator_exp_class2 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.45000005), ast.CompOpType.GTE), ast.NumVal(0.0344139598), ast.NumVal(-0.0717073306))), ast.BinNumOpType.ADD)), to_reuse=True) estimator_exp_class3 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(3), ast.NumVal(1.6500001), ast.CompOpType.GTE), ast.NumVal(0.13432835), ast.NumVal(-0.0644444525))), ast.BinNumOpType.ADD)), to_reuse=True) exp_sum = ast.BinNumExpr(ast.BinNumExpr(estimator_exp_class1, estimator_exp_class2, ast.BinNumOpType.ADD), estimator_exp_class3, ast.BinNumOpType.ADD, to_reuse=True) expected = ast.VectorVal([ ast.BinNumExpr(estimator_exp_class1, exp_sum, ast.BinNumOpType.DIV), ast.BinNumExpr(estimator_exp_class2, exp_sum, ast.BinNumOpType.DIV), ast.BinNumExpr(estimator_exp_class3, exp_sum, ast.BinNumOpType.DIV) ]) assert utils.cmp_exprs(actual, expected)
def _assemble_single_output(self, estimator_params, base_score=0, split_idx=0): estimators_ast = self._assemble_estimators(estimator_params, split_idx) tmp_ast = utils.apply_op_to_expressions(ast.BinNumOpType.ADD, ast.NumVal(base_score), *estimators_ast) result_ast = self._final_transform(tmp_ast) return ast.SubroutineExpr(result_ast)
def _assemble_estimators(self, trees, split_idx): if self._tree_limit: trees = trees[:self._tree_limit] trees_ast = [ast.SubroutineExpr(self._assemble_tree(t)) for t in trees] # In a large tree we need to generate multiple subroutines to avoid # java limitations https://github.com/BayesWitnesses/m2cgen/issues/103. trees_num_leaves = [self._count_leaves(t) for t in trees] if sum(trees_num_leaves) > self._leaves_cutoff_threshold: return self._split_into_subroutines(trees_ast, trees_num_leaves) else: return trees_ast
def _rbf_kernel_ast(estimator, sup_vec_value, to_reuse=False): negative_gamma_ast = ast.BinNumExpr(ast.NumVal(0), ast.NumVal(estimator.gamma), ast.BinNumOpType.SUB, to_reuse=True) return ast.SubroutineExpr(ast.ExpExpr( ast.BinNumExpr( negative_gamma_ast, ast.PowExpr( ast.BinNumExpr(ast.NumVal(sup_vec_value), ast.FeatureRef(0), ast.BinNumOpType.SUB), ast.NumVal(2)), ast.BinNumOpType.MUL)), to_reuse=to_reuse)
def test_linear_model(): estimator = xgboost.XGBRegressor(n_estimators=2, random_state=1, feature_selector="shuffle", booster="gblinear") utils.get_regression_model_trainer()(estimator) assembler = assemblers.XGBoostModelAssemblerSelector(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.00999326), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.0520094), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.10447), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(0.17387), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(0.691745), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(0.296357), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(0.0288206), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(0.417822), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.0551116), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(0.00242449), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(0.109585), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.00744202), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(0.0731089), ast.BinNumOpType.MUL), ] expected = ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), assemblers.utils.apply_op_to_expressions(ast.BinNumOpType.ADD, ast.NumVal(3.13109), *feature_weight_mul), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_binary_classification(): estimator = xgboost.XGBClassifier(n_estimators=2, random_state=1, max_depth=1) utils.train_model_classification_binary(estimator) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() sigmoid = ast.BinNumExpr( ast.NumVal(1), ast.BinNumExpr( ast.NumVal(1), ast.ExpExpr( ast.BinNumExpr( ast.NumVal(0), ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(-0.0), ast.IfExpr( ast.CompExpr( ast.FeatureRef(20), ast.NumVal(16.7950001), ast.CompOpType.GTE), ast.NumVal(-0.17062147), ast.NumVal(0.1638484)), ast.BinNumOpType.ADD), ast.IfExpr( ast.CompExpr( ast.FeatureRef(27), ast.NumVal(0.142349988), ast.CompOpType.GTE), ast.NumVal(-0.16087772), ast.NumVal(0.149866998)), ast.BinNumOpType.ADD)), ast.BinNumOpType.SUB)), ast.BinNumOpType.ADD), ast.BinNumOpType.DIV, to_reuse=True) expected = ast.VectorVal([ ast.BinNumExpr(ast.NumVal(1), sigmoid, ast.BinNumOpType.SUB), sigmoid]) assert utils.cmp_exprs(actual, expected)