def testRankingDontThrowExceptionForForEstimator(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        est = estimator.CoreGradientBoostedDecisionTreeRanker(
            head=head_fn,
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[
                core_feature_column.numeric_column("f1"),
                core_feature_column.numeric_column("f2")
            ],
            ranking_model_pair_keys=("a", "b"))

        # Train for a few steps.
        est.train(input_fn=_ranking_train_input_fn, steps=1000)
        est.evaluate(input_fn=_ranking_train_input_fn, steps=1)
        est.predict(input_fn=_infer_ranking_train_input_fn)
示例#2
0
def _get_estimator(output_dir, feature_cols):
    """Configures DNNBoostedTreeCombinedRegressor based on flags."""
    learner_config = learner_pb2.LearnerConfig()
    learner_config.learning_rate_tuner.fixed.learning_rate = (
        FLAGS.tree_learning_rate)
    learner_config.regularization.l1 = 0.0
    learner_config.regularization.l2 = FLAGS.tree_l2
    learner_config.constraints.max_tree_depth = FLAGS.tree_depth

    run_config = tf.contrib.learn.RunConfig(save_summary_steps=1)

    # Create a DNNBoostedTreeCombinedRegressor estimator.
    estimator = DNNBoostedTreeCombinedRegressor(
        dnn_hidden_units=[int(x) for x in FLAGS.dnn_hidden_units.split(",")],
        dnn_feature_columns=feature_cols,
        tree_learner_config=learner_config,
        num_trees=FLAGS.num_trees,
        # This should be the number of examples. For large datasets it can be
        # larger than the batch_size.
        tree_examples_per_layer=FLAGS.batch_size,
        model_dir=output_dir,
        config=run_config,
        dnn_input_layer_to_tree=True,
        dnn_steps_to_train=FLAGS.dnn_steps_to_train)
    return estimator
    def testFitAndEvaluateMultiClassFullDontThrowException(self):
        n_classes = 3
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = n_classes
        learner_config.constraints.max_tree_depth = 1
        learner_config.multi_class_strategy = (
            learner_pb2.LearnerConfig.FULL_HESSIAN)

        head_fn = estimator.core_multiclass_head(n_classes=n_classes)

        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.CoreGradientBoostedDecisionTreeEstimator(
            learner_config=learner_config,
            head=head_fn,
            num_trees=1,
            center_bias=False,
            examples_per_layer=7,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")])

        classifier.train(input_fn=_multiclass_train_input_fn, steps=100)
        classifier.evaluate(input_fn=_multiclass_train_input_fn, steps=1)
        classifier.predict(input_fn=_eval_input_fn)
示例#4
0
def _get_tfbt(output_dir):
    """Configures TF Boosted Trees estimator based on flags."""
    learner_config = learner_pb2.LearnerConfig()

    num_classes = 10

    learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate
    learner_config.num_classes = num_classes
    learner_config.regularization.l1 = 0.0
    learner_config.regularization.l2 = FLAGS.l2 / FLAGS.examples_per_layer
    learner_config.constraints.max_tree_depth = FLAGS.depth

    growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
    learner_config.growing_mode = growing_mode
    run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)

    learner_config.multi_class_strategy = (
        learner_pb2.LearnerConfig.DIAGONAL_HESSIAN)

    # Create a TF Boosted trees estimator that can take in custom loss.
    estimator = GradientBoostedDecisionTreeClassifier(
        learner_config=learner_config,
        n_classes=num_classes,
        examples_per_layer=FLAGS.examples_per_layer,
        model_dir=output_dir,
        num_trees=FLAGS.num_trees,
        center_bias=False,
        config=run_config)
    return estimator
    def testQuantileRegression(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 6
        learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
        learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.tree_complexity = (
            1.0 / _QUANTILE_REGRESSION_SIZE)

        train_input_fn, test_input_fn, y = _quantile_regression_input_fns()

        # 95% percentile.
        model_upper = estimator.GradientBoostedDecisionTreeQuantileRegressor(
            quantiles=[0.95],
            learner_config=learner_config,
            num_trees=12,
            examples_per_layer=_QUANTILE_REGRESSION_SIZE,
            center_bias=False)

        model_upper.fit(input_fn=train_input_fn, steps=1000)
        result_iter = model_upper.predict(input_fn=test_input_fn)
        upper = []
        for prediction_dict in result_iter:
            upper.append(prediction_dict["scores"])

        frac_below_upper = round(1. * np.count_nonzero(upper > y) / len(y), 3)
        # +/- 3%
        self.assertTrue(frac_below_upper >= 0.92)
        self.assertTrue(frac_below_upper <= 0.98)
    def testFitAndEvaluateMultiClassFullDontThrowException(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 3
        learner_config.constraints.max_tree_depth = 1
        learner_config.multi_class_strategy = (
            learner_pb2.LearnerConfig.FULL_HESSIAN)

        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            n_classes=learner_config.num_classes,
            num_trees=1,
            examples_per_layer=7,
            model_dir=model_dir,
            config=config,
            center_bias=False,
            feature_columns=[contrib_feature_column.real_valued_column("x")])

        classifier.fit(input_fn=_multiclass_train_input_fn, steps=100)
        classifier.evaluate(input_fn=_eval_input_fn, steps=1)
        classifier.export(self._export_dir_base)
        result_iter = classifier.predict(input_fn=_eval_input_fn)
        for prediction_dict in result_iter:
            self.assertTrue("classes" in prediction_dict)
示例#7
0
    def testCreate(self):
        with self.cached_session():
            tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
            tree = tree_ensemble_config.trees.add()
            _append_to_leaf(tree.nodes.add().leaf, 0, -0.4)
            tree_ensemble_config.tree_weights.append(1.0)

            # Prepare learner config.
            learner_config = learner_pb2.LearnerConfig()
            learner_config.num_classes = 2

            tree_ensemble_handle = model_ops.tree_ensemble_variable(
                stamp_token=3,
                tree_ensemble_config=tree_ensemble_config.SerializeToString(),
                name="create_tree")
            resources.initialize_resources(resources.shared_resources()).run()

            result, _ = prediction_ops.gradient_trees_prediction(
                tree_ensemble_handle,
                self._seed, [self._dense_float_tensor],
                [self._sparse_float_indices1, self._sparse_float_indices2],
                [self._sparse_float_values1, self._sparse_float_values2],
                [self._sparse_float_shape1, self._sparse_float_shape2],
                [self._sparse_int_indices1], [self._sparse_int_values1],
                [self._sparse_int_shape1],
                learner_config=learner_config.SerializeToString(),
                apply_dropout=False,
                apply_averaging=False,
                center_bias=False,
                reduce_dim=True)
            self.assertAllClose(result.eval(), [[-0.4], [-0.4]])
            stamp_token = model_ops.tree_ensemble_stamp_token(
                tree_ensemble_handle)
            self.assertEqual(stamp_token.eval(), 3)
示例#8
0
    def testTrainEvaluateInferDoesNotThrowErrorWithDnnInput(self):
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 3
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        est = estimator.CoreDNNBoostedTreeCombinedEstimator(
            head=head_fn,
            dnn_hidden_units=[1],
            dnn_feature_columns=[core_feature_column.numeric_column("x")],
            tree_learner_config=learner_config,
            num_trees=1,
            tree_examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            dnn_steps_to_train=10,
            dnn_input_layer_to_tree=True,
            tree_feature_columns=[])

        # Train for a few steps.
        est.train(input_fn=_train_input_fn, steps=1000)
        res = est.evaluate(input_fn=_eval_input_fn, steps=1)
        self.assertLess(0.5, res["auc"])
        est.predict(input_fn=_eval_input_fn)
    def testWeightedCategoricalColumn(self):
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        feature_columns = [
            core_feature_column.weighted_categorical_column(
                categorical_column=core_feature_column.
                categorical_column_with_vocabulary_list(
                    key="word", vocabulary_list=["the", "cat", "dog"]),
                weight_feature_key="weight")
        ]

        labels = np.array([[1], [1], [0], [0.]], dtype=np.float32)

        def _make_input_fn():
            def _input_fn():
                features_dict = {}
                # Sparse tensor representing
                # example 0: "cat","the"
                # examaple 1: "dog"
                # example 2: -
                # example 3: "the"
                # Weights for the words are 5 - cat, 6- dog and 1 -the.
                features_dict["word"] = sparse_tensor.SparseTensor(
                    indices=[[0, 0], [0, 1], [1, 0], [3, 0]],
                    values=constant_op.constant(["the", "cat", "dog", "the"],
                                                dtype=dtypes.string),
                    dense_shape=[4, 3])
                features_dict["weight"] = sparse_tensor.SparseTensor(
                    indices=[[0, 0], [0, 1], [1, 0], [3, 0]],
                    values=[1., 5., 6., 1.],
                    dense_shape=[4, 3])
                return features_dict, labels

            return _input_fn

        est = estimator.CoreGradientBoostedDecisionTreeEstimator(
            head=head_fn,
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=feature_columns)

        input_fn = _make_input_fn()
        est.train(input_fn=input_fn, steps=100)
        est.evaluate(input_fn=input_fn, steps=1)
        est.predict(input_fn=input_fn)
示例#10
0
    def testNoDNNFeatureColumns(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2

        with self.assertRaisesRegexp(ValueError,
                                     "dnn_feature_columns must be specified"):
            classifier = estimator.DNNBoostedTreeCombinedClassifier(
                dnn_hidden_units=[1],
                dnn_feature_columns=[],
                tree_learner_config=learner_config,
                num_trees=1,
                tree_examples_per_layer=3,
                n_classes=2)
            classifier.fit(input_fn=_train_input_fn, steps=5)
    def testFitAndEvaluateDontThrowException(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")])

        classifier.fit(input_fn=_train_input_fn, steps=15)
        classifier.evaluate(input_fn=_eval_input_fn, steps=1)
        classifier.export(self._export_dir_base)
示例#12
0
    def testTrainEvaluateWithDnnForInputAndTreeForPredict(self):
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 3
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        est = estimator.CoreDNNBoostedTreeCombinedEstimator(
            head=head_fn,
            dnn_hidden_units=[1],
            dnn_feature_columns=[core_feature_column.numeric_column("x")],
            tree_learner_config=learner_config,
            num_trees=1,
            tree_examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            dnn_steps_to_train=10,
            dnn_input_layer_to_tree=True,
            predict_with_tree_only=True,
            dnn_to_tree_distillation_param=(0.5, None),
            tree_feature_columns=[])

        # Train for a few steps.
        est.train(input_fn=_train_input_fn, steps=1000)
        res = est.evaluate(input_fn=_eval_input_fn, steps=1)
        self.assertLess(0.5, res["auc"])
        est.predict(input_fn=_eval_input_fn)
        serving_input_fn = (export.build_parsing_serving_input_receiver_fn(
            feature_spec={
                "x": parsing_ops.FixedLenFeature([1], dtype=dtypes.float32)
            }))
        base_exporter = exporter.FinalExporter(
            name="Servo",
            serving_input_receiver_fn=serving_input_fn,
            assets_extra=None)
        export_path = os.path.join(model_dir, "export")
        base_exporter.export(est,
                             export_path=export_path,
                             checkpoint_path=None,
                             eval_result={},
                             is_the_final_export=True)
    def testOverridesGlobalSteps(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 2
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")],
            output_leaf_index=False,
            override_global_step_value=10000000)

        classifier.fit(input_fn=_train_input_fn, steps=15)
        self._assert_checkpoint(classifier.model_dir, global_step=10000000)
    def testFitAndEvaluateDontThrowExceptionWithCoreForRegressor(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        regressor = estimator.GradientBoostedDecisionTreeRegressor(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")],
            use_core_libs=True)

        regressor.fit(input_fn=_train_input_fn, steps=15)
        regressor.evaluate(input_fn=_eval_input_fn, steps=1)
        regressor.export(self._export_dir_base)
    def testThatLeafIndexIsInPredictions(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")],
            output_leaf_index=True)

        classifier.fit(input_fn=_train_input_fn, steps=15)
        result_iter = classifier.predict(input_fn=_eval_input_fn)
        for prediction_dict in result_iter:
            self.assertTrue("leaf_index" in prediction_dict)
            self.assertTrue("logits" in prediction_dict)
示例#16
0
    def testFitAndEvaluateDontThrowException(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.DNNBoostedTreeCombinedClassifier(
            dnn_hidden_units=[1],
            dnn_feature_columns=[feature_column.real_valued_column("x")],
            tree_learner_config=learner_config,
            num_trees=1,
            tree_examples_per_layer=3,
            n_classes=2,
            model_dir=model_dir,
            config=config,
            dnn_steps_to_train=10,
            dnn_input_layer_to_tree=False,
            tree_feature_columns=[feature_column.real_valued_column("x")])

        classifier.fit(input_fn=_train_input_fn, steps=15)
        classifier.evaluate(input_fn=_eval_input_fn, steps=1)
示例#17
0
def _get_tfbt(output_dir, feature_cols):
    """Configures TF Boosted Trees estimator based on flags."""
    learner_config = learner_pb2.LearnerConfig()
    learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate
    learner_config.regularization.l1 = 0.0
    learner_config.regularization.l2 = FLAGS.l2
    learner_config.constraints.max_tree_depth = FLAGS.depth

    run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)

    # Create a TF Boosted trees regression estimator.
    estimator = GradientBoostedDecisionTreeRegressor(
        learner_config=learner_config,
        # This should be the number of examples. For large datasets it can be
        # larger than the batch_size.
        examples_per_layer=FLAGS.batch_size,
        feature_columns=feature_cols,
        label_dimension=1,
        model_dir=output_dir,
        num_trees=FLAGS.num_trees,
        center_bias=False,
        config=run_config)
    return estimator
    def testTrainEvaluateInferDoesNotThrowError(self):
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        est = estimator.CoreGradientBoostedDecisionTreeEstimator(
            head=head_fn,
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")])

        # Train for a few steps.
        est.train(input_fn=_train_input_fn, steps=1000)
        est.evaluate(input_fn=_eval_input_fn, steps=1)
        est.predict(input_fn=_eval_input_fn)
    def testFitAndEvaluateDontThrowExceptionWithCoreForEstimator(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        # Use core head
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)

        model = estimator.GradientBoostedDecisionTreeEstimator(
            head=head_fn,
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")],
            use_core_libs=True)

        model.fit(input_fn=_train_input_fn, steps=15)
        model.evaluate(input_fn=_eval_input_fn, steps=1)
        model.export(self._export_dir_base)
    def testQuantileRegressionDoesNotThroughException(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE
        learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.tree_complexity = (
            1.0 / _QUANTILE_REGRESSION_SIZE)

        train_input_fn, test_input_fn, y = _quantile_regression_input_fns()
        y = y.reshape(_QUANTILE_REGRESSION_SIZE, 1)

        # 95% percentile.
        model_upper = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor(
            quantiles=[0.95],
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=_QUANTILE_REGRESSION_SIZE,
            center_bias=False)

        model_upper.train(input_fn=train_input_fn, steps=1000)
        result_iter = model_upper.predict(input_fn=test_input_fn)
示例#21
0
    def testRestore(self):
        # Calling self.cached_session() without a graph specified results in
        # TensorFlowTestCase caching the session and returning the same one
        # every time. In this test, we need to create two different sessions
        # which is why we also create a graph and pass it to self.cached_session()
        # to ensure no caching occurs under the hood.
        save_path = os.path.join(self.get_temp_dir(), "restore-test")
        with ops.Graph().as_default() as graph:
            with self.session(graph) as sess:
                # Prepare learner config.
                learner_config = learner_pb2.LearnerConfig()
                learner_config.num_classes = 2

                # Add the first tree and save.
                tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig(
                )
                tree = tree_ensemble_config.trees.add()
                tree_ensemble_config.tree_metadata.add().is_finalized = True
                tree_ensemble_config.tree_weights.append(1.0)
                _append_to_leaf(tree.nodes.add().leaf, 0, -0.1)
                tree_ensemble_handle = model_ops.tree_ensemble_variable(
                    stamp_token=3,
                    tree_ensemble_config=tree_ensemble_config.
                    SerializeToString(),
                    name="restore_tree")
                resources.initialize_resources(
                    resources.shared_resources()).run()
                variables.global_variables_initializer().run()
                my_saver = saver.Saver()

                # Add the second tree and replace the ensemble of the handle.
                tree2 = tree_ensemble_config.trees.add()
                tree_ensemble_config.tree_weights.append(1.0)
                _append_to_leaf(tree2.nodes.add().leaf, 0, -1.0)
                # Predict to confirm.
                with ops.control_dependencies([
                        model_ops.tree_ensemble_deserialize(
                            tree_ensemble_handle,
                            stamp_token=3,
                            tree_ensemble_config=tree_ensemble_config.
                            SerializeToString())
                ]):
                    result, _ = prediction_ops.gradient_trees_prediction(
                        tree_ensemble_handle,
                        self._seed, [self._dense_float_tensor], [
                            self._sparse_float_indices1,
                            self._sparse_float_indices2
                        ], [
                            self._sparse_float_values1,
                            self._sparse_float_values2
                        ],
                        [self._sparse_float_shape1, self._sparse_float_shape2],
                        [self._sparse_int_indices1],
                        [self._sparse_int_values1], [self._sparse_int_shape1],
                        learner_config=learner_config.SerializeToString(),
                        apply_dropout=False,
                        apply_averaging=False,
                        center_bias=False,
                        reduce_dim=True)
                self.assertAllClose([[-1.1], [-1.1]], result.eval())
                # Save before adding other trees.
                val = my_saver.save(sess, save_path)
                self.assertEqual(save_path, val)

                # Add more trees after saving.
                tree3 = tree_ensemble_config.trees.add()
                tree_ensemble_config.tree_weights.append(1.0)
                _append_to_leaf(tree3.nodes.add().leaf, 0, -10.0)
                # Predict to confirm.
                with ops.control_dependencies([
                        model_ops.tree_ensemble_deserialize(
                            tree_ensemble_handle,
                            stamp_token=3,
                            tree_ensemble_config=tree_ensemble_config.
                            SerializeToString())
                ]):
                    result, _ = prediction_ops.gradient_trees_prediction(
                        tree_ensemble_handle,
                        self._seed, [self._dense_float_tensor], [
                            self._sparse_float_indices1,
                            self._sparse_float_indices2
                        ], [
                            self._sparse_float_values1,
                            self._sparse_float_values2
                        ],
                        [self._sparse_float_shape1, self._sparse_float_shape2],
                        [self._sparse_int_indices1],
                        [self._sparse_int_values1], [self._sparse_int_shape1],
                        learner_config=learner_config.SerializeToString(),
                        apply_dropout=False,
                        apply_averaging=False,
                        center_bias=False,
                        reduce_dim=True)
                self.assertAllClose(result.eval(), [[-11.1], [-11.1]])

        # Start a second session.  In that session the parameter nodes
        # have not been initialized either.
        with ops.Graph().as_default() as graph:
            with self.session(graph) as sess:
                tree_ensemble_handle = model_ops.tree_ensemble_variable(
                    stamp_token=0,
                    tree_ensemble_config="",
                    name="restore_tree")
                my_saver = saver.Saver()
                my_saver.restore(sess, save_path)
                result, _ = prediction_ops.gradient_trees_prediction(
                    tree_ensemble_handle,
                    self._seed, [self._dense_float_tensor],
                    [self._sparse_float_indices1, self._sparse_float_indices2],
                    [self._sparse_float_values1, self._sparse_float_values2],
                    [self._sparse_float_shape1, self._sparse_float_shape2],
                    [self._sparse_int_indices1], [self._sparse_int_values1],
                    [self._sparse_int_shape1],
                    learner_config=learner_config.SerializeToString(),
                    apply_dropout=False,
                    apply_averaging=False,
                    center_bias=False,
                    reduce_dim=True)
                # Make sure we only have the first and second tree.
                # The third tree was added after the save.
                self.assertAllClose(result.eval(), [[-1.1], [-1.1]])
示例#22
0
    def testSerialization(self):
        with ops.Graph().as_default() as graph:
            with self.session(graph):
                tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig(
                )
                # Bias tree only for second class.
                tree1 = tree_ensemble_config.trees.add()
                _append_to_leaf(tree1.nodes.add().leaf, 1, -0.2)

                tree_ensemble_config.tree_weights.append(1.0)

                # Depth 2 tree.
                tree2 = tree_ensemble_config.trees.add()
                tree_ensemble_config.tree_weights.append(1.0)
                _set_float_split(
                    tree2.nodes.add().sparse_float_binary_split_default_right.
                    split, 1, 4.0, 1, 2)
                _set_float_split(tree2.nodes.add().dense_float_binary_split, 0,
                                 9.0, 3, 4)
                _append_to_leaf(tree2.nodes.add().leaf, 0, 0.5)
                _append_to_leaf(tree2.nodes.add().leaf, 1, 1.2)
                _append_to_leaf(tree2.nodes.add().leaf, 0, -0.9)

                tree_ensemble_handle = model_ops.tree_ensemble_variable(
                    stamp_token=7,
                    tree_ensemble_config=tree_ensemble_config.
                    SerializeToString(),
                    name="saver_tree")
                stamp_token, serialized_config = model_ops.tree_ensemble_serialize(
                    tree_ensemble_handle)
                resources.initialize_resources(
                    resources.shared_resources()).run()
                self.assertEqual(stamp_token.eval(), 7)
                serialized_config = serialized_config.eval()

        with ops.Graph().as_default() as graph:
            with self.session(graph):
                tree_ensemble_handle2 = model_ops.tree_ensemble_variable(
                    stamp_token=9,
                    tree_ensemble_config=serialized_config,
                    name="saver_tree2")
                resources.initialize_resources(
                    resources.shared_resources()).run()

                # Prepare learner config.
                learner_config = learner_pb2.LearnerConfig()
                learner_config.num_classes = 3

                result, _ = prediction_ops.gradient_trees_prediction(
                    tree_ensemble_handle2,
                    self._seed, [self._dense_float_tensor],
                    [self._sparse_float_indices1, self._sparse_float_indices2],
                    [self._sparse_float_values1, self._sparse_float_values2],
                    [self._sparse_float_shape1, self._sparse_float_shape2],
                    [self._sparse_int_indices1], [self._sparse_int_values1],
                    [self._sparse_int_shape1],
                    learner_config=learner_config.SerializeToString(),
                    apply_dropout=False,
                    apply_averaging=False,
                    center_bias=False,
                    reduce_dim=True)

                # Re-serialize tree.
                stamp_token2, serialized_config2 = model_ops.tree_ensemble_serialize(
                    tree_ensemble_handle2)

                # The first example will get bias class 1 -0.2 from first tree and
                # leaf 2 payload (sparse feature missing) of 0.5 hence [0.5, -0.2],
                # the second example will get the same bias class 1 -0.2 and leaf 3
                # payload of class 1 1.2 hence [0.0, 1.0].
                self.assertEqual(stamp_token2.eval(), 9)

                # Class 2 does have scores in the leaf => it gets score 0.
                self.assertEqual(serialized_config2.eval(), serialized_config)
                self.assertAllClose(result.eval(), [[0.5, -0.2], [0, 1.0]])
    def testForcedInitialSplits(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 3

        initial_subtree = """
            nodes {
              dense_float_binary_split {
                feature_column: 0
                threshold: -0.5
                left_id: 1
                right_id: 2
              }
              node_metadata {
                gain: 0
              }
            }
            nodes {
              dense_float_binary_split {
                feature_column: 1
                threshold: 0.52
                left_id: 3
                right_id: 4
              }
              node_metadata {
                gain: 0
              }
            }
            nodes {
              dense_float_binary_split {
                feature_column: 1
                threshold: 0.554
                left_id: 5
                right_id: 6
              }
              node_metadata {
                gain: 0
              }
            }
            nodes {
              leaf {
                vector {
                  value: 0.0
                }
              }
            }
            nodes {
              leaf {
                vector {
                  value: 0.0
                }
              }
            }
            nodes {
              leaf {
                vector {
                  value: 0.0
                }
              }
            }
            nodes {
              leaf {
                vector {
                  value: 0.0
                }
              }
            }
    """
        tree_proto = tree_config_pb2.DecisionTreeConfig()
        text_format.Merge(initial_subtree, tree_proto)

        # Set initial subtree info.
        learner_config.each_tree_start.CopyFrom(tree_proto)
        learner_config.each_tree_start_num_layers = 2

        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=2,
            examples_per_layer=6,
            model_dir=model_dir,
            config=config,
            center_bias=False,
            feature_columns=[contrib_feature_column.real_valued_column("x")],
            output_leaf_index=False)

        classifier.fit(input_fn=_train_input_fn, steps=100)
        # When no override of global steps, 5 steps were used.
        ensemble = self._assert_checkpoint_and_return_model(
            classifier.model_dir, global_step=6)

        # TODO(nponomareva): find a better way to test this.
        expected_ensemble = """
      trees {
        nodes {
          dense_float_binary_split {
            threshold: -0.5
            left_id: 1
            right_id: 2
          }
          node_metadata {
          }
        }
        nodes {
          dense_float_binary_split {
            feature_column: 1
            threshold: 0.52
            left_id: 3
            right_id: 4
          }
          node_metadata {
          }
        }
        nodes {
          dense_float_binary_split {
            feature_column: 1
            threshold: 0.554
            left_id: 5
            right_id: 6
          }
          node_metadata {
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          dense_float_binary_split {
            threshold: 1.0
            left_id: 7
            right_id: 8
          }
          node_metadata {
            gain: 0.888888895512
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: -2.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 2.00000023842
            }
          }
        }
      }
      trees {
        nodes {
          dense_float_binary_split {
            threshold: -0.5
            left_id: 1
            right_id: 2
          }
          node_metadata {
          }
        }
        nodes {
          dense_float_binary_split {
            feature_column: 1
            threshold: 0.52
            left_id: 3
            right_id: 4
          }
          node_metadata {
          }
        }
        nodes {
          dense_float_binary_split {
            feature_column: 1
            threshold: 0.554
            left_id: 5
            right_id: 6
          }
          node_metadata {
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          dense_float_binary_split {
            threshold: 1.0
            left_id: 7
            right_id: 8
          }
          node_metadata {
            gain: 0.727760672569
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: -1.81873059273
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 1.81873047352
            }
          }
        }
      }
      trees {
        nodes {
          dense_float_binary_split {
            threshold: -0.5
            left_id: 1
            right_id: 2
          }
          node_metadata {
          }
        }
        nodes {
          dense_float_binary_split {
            feature_column: 1
            threshold: 0.52
            left_id: 3
            right_id: 4
          }
          node_metadata {
          }
        }
        nodes {
          dense_float_binary_split {
            feature_column: 1
            threshold: 0.554
            left_id: 5
            right_id: 6
          }
          node_metadata {
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
      }
      tree_weights: 0.10000000149
      tree_weights: 0.10000000149
      tree_weights: 0.10000000149
      tree_metadata {
        num_tree_weight_updates: 1
        num_layers_grown: 3
        is_finalized: true
      }
      tree_metadata {
        num_tree_weight_updates: 1
        num_layers_grown: 3
        is_finalized: true
      }
      tree_metadata {
        num_tree_weight_updates: 1
        num_layers_grown: 2
      }
      growing_metadata {
        num_layers_attempted: 3
      }
    """
        self.assertProtoEquals(expected_ensemble, ensemble)