示例#1
0
    def testCreate(self):
        with self.test_session():
            tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
            tree = tree_ensemble_config.trees.add()
            _append_to_leaf(tree.nodes.add().leaf, 0, -0.4)
            tree_ensemble_config.tree_weights.append(1.0)

            # Prepare learner config.
            learner_config = learner_pb2.LearnerConfig()
            learner_config.num_classes = 2

            tree_ensemble_handle = model_ops.tree_ensemble_variable(
                stamp_token=3,
                tree_ensemble_config=tree_ensemble_config.SerializeToString(),
                name="create_tree")
            resources.initialize_resources(resources.shared_resources()).run()

            result, _, _ = prediction_ops.gradient_trees_prediction(
                tree_ensemble_handle,
                self._seed, [self._dense_float_tensor],
                [self._sparse_float_indices1, self._sparse_float_indices2],
                [self._sparse_float_values1, self._sparse_float_values2],
                [self._sparse_float_shape1, self._sparse_float_shape2],
                [self._sparse_int_indices1], [self._sparse_int_values1],
                [self._sparse_int_shape1],
                learner_config=learner_config.SerializeToString(),
                apply_dropout=False,
                apply_averaging=False,
                center_bias=False,
                reduce_dim=True)
            self.assertAllClose(result.eval(), [[-0.4], [-0.4]])
            stamp_token = model_ops.tree_ensemble_stamp_token(
                tree_ensemble_handle)
            self.assertEqual(stamp_token.eval(), 3)
示例#2
0
  def testCreate(self):
    with self.cached_session():
      tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
      tree = tree_ensemble_config.trees.add()
      _append_to_leaf(tree.nodes.add().leaf, 0, -0.4)
      tree_ensemble_config.tree_weights.append(1.0)

      # Prepare learner config.
      learner_config = learner_pb2.LearnerConfig()
      learner_config.num_classes = 2

      tree_ensemble_handle = model_ops.tree_ensemble_variable(
          stamp_token=3,
          tree_ensemble_config=tree_ensemble_config.SerializeToString(),
          name="create_tree")
      resources.initialize_resources(resources.shared_resources()).run()

      result, _ = prediction_ops.gradient_trees_prediction(
          tree_ensemble_handle,
          self._seed, [self._dense_float_tensor], [
              self._sparse_float_indices1, self._sparse_float_indices2
          ], [self._sparse_float_values1, self._sparse_float_values2],
          [self._sparse_float_shape1,
           self._sparse_float_shape2], [self._sparse_int_indices1],
          [self._sparse_int_values1], [self._sparse_int_shape1],
          learner_config=learner_config.SerializeToString(),
          apply_dropout=False,
          apply_averaging=False,
          center_bias=False,
          reduce_dim=True)
      self.assertAllClose(result.eval(), [[-0.4], [-0.4]])
      stamp_token = model_ops.tree_ensemble_stamp_token(tree_ensemble_handle)
      self.assertEqual(stamp_token.eval(), 3)
    def _predict_and_return_dict(self, ensemble_handle, ensemble_stamp, mode):
        """Runs prediction and returns a dictionary of the prediction results.

    Args:
      ensemble_handle: ensemble resource handle.
      ensemble_stamp: stamp of ensemble resource.
      mode: learn.ModeKeys.TRAIN or EVAL or INFER.

    Returns:
      a dictionary of prediction results -
        ENSEMBLE_STAMP, PREDICTION, PARTITION_IDS,
        NUM_LAYER_ATTEMPTED, NUM_TREES_ATTEMPED.
    """
        ensemble_stats = training_ops.tree_ensemble_stats(
            ensemble_handle, ensemble_stamp)
        num_handlers = (len(self._dense_floats) +
                        len(self._sparse_float_shapes) +
                        len(self._sparse_int_shapes))
        # Used during feature selection.
        used_handlers = model_ops.tree_ensemble_used_handlers(
            ensemble_handle, ensemble_stamp, num_all_handlers=num_handlers)

        # We don't need dropout info - we can always restore it based on the
        # seed.
        apply_dropout, seed = _dropout_params(mode, ensemble_stats)
        # Make sure ensemble stats run. This will check that the ensemble has
        # the right stamp.
        with ops.control_dependencies(ensemble_stats):
            predictions, _ = prediction_ops.gradient_trees_prediction(
                ensemble_handle,
                seed,
                self._dense_floats,
                self._sparse_float_indices,
                self._sparse_float_values,
                self._sparse_float_shapes,
                self._sparse_int_indices,
                self._sparse_int_values,
                self._sparse_int_shapes,
                learner_config=self._learner_config_serialized,
                apply_dropout=apply_dropout,
                apply_averaging=mode != learn.ModeKeys.TRAIN,
                use_locking=True,
                center_bias=self._center_bias,
                reduce_dim=self._reduce_dim)
            partition_ids = prediction_ops.gradient_trees_partition_examples(
                ensemble_handle,
                self._dense_floats,
                self._sparse_float_indices,
                self._sparse_float_values,
                self._sparse_float_shapes,
                self._sparse_int_indices,
                self._sparse_int_values,
                self._sparse_int_shapes,
                use_locking=True)

        return _make_predictions_dict(ensemble_stamp, predictions,
                                      partition_ids, ensemble_stats,
                                      used_handlers)
示例#4
0
  def _predict_and_return_dict(self, ensemble_handle, ensemble_stamp, mode):
    """Runs prediction and returns a dictionary of the prediction results.

    Args:
      ensemble_handle: ensemble resource handle.
      ensemble_stamp: stamp of ensemble resource.
      mode: learn.ModeKeys.TRAIN or EVAL or INFER.

    Returns:
      a dictionary of prediction results -
        ENSEMBLE_STAMP, PREDICTION, PARTITION_IDS,
        NUM_LAYER_ATTEMPTED, NUM_TREES_ATTEMPED.
    """
    ensemble_stats = training_ops.tree_ensemble_stats(ensemble_handle,
                                                      ensemble_stamp)
    num_handlers = (
        len(self._dense_floats) + len(self._sparse_float_shapes) +
        len(self._sparse_int_shapes))
    # Used during feature selection.
    used_handlers = model_ops.tree_ensemble_used_handlers(
        ensemble_handle, ensemble_stamp, num_all_handlers=num_handlers)

    # We don't need dropout info - we can always restore it based on the
    # seed.
    apply_dropout, seed = _dropout_params(mode, ensemble_stats)
    # Make sure ensemble stats run. This will check that the ensemble has
    # the right stamp.
    with ops.control_dependencies(ensemble_stats):
      predictions, _ = prediction_ops.gradient_trees_prediction(
          ensemble_handle,
          seed,
          self._dense_floats,
          self._sparse_float_indices,
          self._sparse_float_values,
          self._sparse_float_shapes,
          self._sparse_int_indices,
          self._sparse_int_values,
          self._sparse_int_shapes,
          learner_config=self._learner_config_serialized,
          apply_dropout=apply_dropout,
          apply_averaging=mode != learn.ModeKeys.TRAIN,
          use_locking=True,
          center_bias=self._center_bias,
          reduce_dim=self._reduce_dim)
      partition_ids = prediction_ops.gradient_trees_partition_examples(
          ensemble_handle,
          self._dense_floats,
          self._sparse_float_indices,
          self._sparse_float_values,
          self._sparse_float_shapes,
          self._sparse_int_indices,
          self._sparse_int_values,
          self._sparse_int_shapes,
          use_locking=True)

    return _make_predictions_dict(ensemble_stamp, predictions, partition_ids,
                                  ensemble_stats, used_handlers)
示例#5
0
 def _get_predictions(self,
                      tree_ensemble_handle,
                      learner_config,
                      apply_dropout=False,
                      apply_averaging=False,
                      center_bias=False,
                      reduce_dim=False):
   return prediction_ops.gradient_trees_prediction(
       tree_ensemble_handle,
       self._seed, [self._dense_float_tensor],
       [self._sparse_float_indices1, self._sparse_float_indices2],
       [self._sparse_float_values1, self._sparse_float_values2],
       [self._sparse_float_shape1, self._sparse_float_shape2],
       [self._sparse_int_indices1], [self._sparse_int_values1],
       [self._sparse_int_shape1],
       learner_config=learner_config,
       apply_dropout=apply_dropout,
       apply_averaging=apply_averaging,
       center_bias=center_bias,
       reduce_dim=reduce_dim)
示例#6
0
  def predict(self, mode):
    """Returns predictions given the features and mode.

    Args:
      mode: Mode the graph is running in (train|predict|eval).

    Returns:
      A dict of predictions tensors.

    Raises:
      ValueError: if features is not valid.
    """
    apply_averaging = mode != learn.ModeKeys.TRAIN

    # Use the current ensemble to predict on the current batch of input.
    # For faster prediction we check if the inputs are on the same device
    # as the model. If not, we create a copy of the model on the worker.
    input_deps = (self._dense_floats + self._sparse_float_indices +
                  self._sparse_int_indices)
    if not input_deps:
      raise ValueError("No input tensors for prediction.")

    if any(i.device != input_deps[0].device for i in input_deps):
      raise ValueError("All input tensors should be on the same device.")

    # Get most current model stamp.
    ensemble_stamp = model_ops.tree_ensemble_stamp_token(self._ensemble_handle)

    # Determine if ensemble is colocated with the inputs.
    if self._ensemble_handle.device != input_deps[0].device:
      # Create a local ensemble and get its local stamp.
      with ops.name_scope("local_ensemble", "TreeEnsembleVariable") as name:
        local_ensemble_handle = (
            gen_model_ops.decision_tree_ensemble_resource_handle_op(name=name))
        create_op = gen_model_ops.create_tree_ensemble_variable(
            local_ensemble_handle, stamp_token=-1, tree_ensemble_config="")
        with ops.control_dependencies([create_op]):
          local_stamp = model_ops.tree_ensemble_stamp_token(
              local_ensemble_handle)

      # Determine whether the local ensemble is stale and update it if needed.
      def _refresh_local_ensemble_fn():
        # Serialize the model from parameter server after reading all inputs.
        with ops.control_dependencies(input_deps):
          (ensemble_stamp, serialized_model) = (
              model_ops.tree_ensemble_serialize(self._ensemble_handle))

        # Update local ensemble with the serialized model from parameter server.
        with ops.control_dependencies([create_op]):
          return model_ops.tree_ensemble_deserialize(
              local_ensemble_handle,
              stamp_token=ensemble_stamp,
              tree_ensemble_config=serialized_model), ensemble_stamp

      refresh_local_ensemble, ensemble_stamp = control_flow_ops.cond(
          math_ops.not_equal(ensemble_stamp,
                             local_stamp), _refresh_local_ensemble_fn,
          lambda: (control_flow_ops.no_op(), ensemble_stamp))

      # Once updated, Use the the local model for prediction.
      with ops.control_dependencies([refresh_local_ensemble]):
        ensemble_stats = training_ops.tree_ensemble_stats(
            local_ensemble_handle, ensemble_stamp)
        apply_dropout, seed = _dropout_params(mode, ensemble_stats)
        # We don't need dropout info - we can always restore it based on the
        # seed.
        predictions, predictions_no_dropout, _ = (
            prediction_ops.gradient_trees_prediction(
                local_ensemble_handle,
                seed,
                self._dense_floats,
                self._sparse_float_indices,
                self._sparse_float_values,
                self._sparse_float_shapes,
                self._sparse_int_indices,
                self._sparse_int_values,
                self._sparse_int_shapes,
                learner_config=self._learner_config_serialized,
                apply_dropout=apply_dropout,
                apply_averaging=apply_averaging,
                use_locking=False,
                center_bias=self._center_bias,
                reduce_dim=self._reduce_dim))
        partition_ids = prediction_ops.gradient_trees_partition_examples(
            local_ensemble_handle,
            self._dense_floats,
            self._sparse_float_indices,
            self._sparse_float_values,
            self._sparse_float_shapes,
            self._sparse_int_indices,
            self._sparse_int_values,
            self._sparse_int_shapes,
            use_locking=False)

    else:
      with ops.device(self._ensemble_handle.device):
        ensemble_stats = training_ops.tree_ensemble_stats(
            self._ensemble_handle, ensemble_stamp)
        apply_dropout, seed = _dropout_params(mode, ensemble_stats)
        # We don't need dropout info - we can always restore it based on the
        # seed.
        predictions, predictions_no_dropout, _ = (
            prediction_ops.gradient_trees_prediction(
                self._ensemble_handle,
                seed,
                self._dense_floats,
                self._sparse_float_indices,
                self._sparse_float_values,
                self._sparse_float_shapes,
                self._sparse_int_indices,
                self._sparse_int_values,
                self._sparse_int_shapes,
                learner_config=self._learner_config_serialized,
                apply_dropout=apply_dropout,
                apply_averaging=apply_averaging,
                use_locking=False,
                center_bias=self._center_bias,
                reduce_dim=self._reduce_dim))
        partition_ids = prediction_ops.gradient_trees_partition_examples(
            self._ensemble_handle,
            self._dense_floats,
            self._sparse_float_indices,
            self._sparse_float_values,
            self._sparse_float_shapes,
            self._sparse_int_indices,
            self._sparse_int_values,
            self._sparse_int_shapes,
            use_locking=False)

    return _make_predictions_dict(ensemble_stamp, predictions,
                                  predictions_no_dropout, partition_ids,
                                  ensemble_stats)
示例#7
0
    def testRestore(self):
        # Calling self.test_session() without a graph specified results in
        # TensorFlowTestCase caching the session and returning the same one
        # every time. In this test, we need to create two different sessions
        # which is why we also create a graph and pass it to self.test_session()
        # to ensure no caching occurs under the hood.
        save_path = os.path.join(self.get_temp_dir(), "restore-test")
        with ops.Graph().as_default() as graph:
            with self.test_session(graph) as sess:
                tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig(
                )

                tree = tree_ensemble_config.trees.add()
                tree_ensemble_config.tree_metadata.add().is_finalized = True
                tree_ensemble_config.tree_weights.append(1.0)
                _append_to_leaf(tree.nodes.add().leaf, 0, -0.1)

                tree_ensemble_config2 = tree_config_pb2.DecisionTreeEnsembleConfig(
                )
                tree2 = tree_ensemble_config2.trees.add()
                tree_ensemble_config.tree_weights.append(1.0)
                _append_to_leaf(tree2.nodes.add().leaf, 0, -1.0)

                tree_ensemble_config3 = tree_config_pb2.DecisionTreeEnsembleConfig(
                )
                tree3 = tree_ensemble_config3.trees.add()
                tree_ensemble_config.tree_weights.append(1.0)
                _append_to_leaf(tree3.nodes.add().leaf, 0, -10.0)

                # Prepare learner config.
                learner_config = learner_pb2.LearnerConfig()
                learner_config.num_classes = 2

                tree_ensemble_handle = model_ops.tree_ensemble_variable(
                    stamp_token=3,
                    tree_ensemble_config=tree_ensemble_config.
                    SerializeToString(),
                    name="restore_tree")
                feature_usage_counts = variables.Variable(
                    initial_value=array_ops.zeros([1], dtypes.int64),
                    name="feature_usage_counts",
                    trainable=False)
                feature_gains = variables.Variable(
                    initial_value=array_ops.zeros([1], dtypes.float32),
                    name="feature_gains",
                    trainable=False)

                resources.initialize_resources(
                    resources.shared_resources()).run()
                variables.initialize_all_variables().run()
                my_saver = saver.Saver()

                with ops.control_dependencies([
                        ensemble_optimizer_ops.add_trees_to_ensemble(
                            tree_ensemble_handle,
                            tree_ensemble_config2.SerializeToString(),
                            feature_usage_counts, [0],
                            feature_gains, [0], [[]],
                            learning_rate=1)
                ]):
                    result, _, _ = prediction_ops.gradient_trees_prediction(
                        tree_ensemble_handle,
                        self._seed, [self._dense_float_tensor], [
                            self._sparse_float_indices1,
                            self._sparse_float_indices2
                        ], [
                            self._sparse_float_values1,
                            self._sparse_float_values2
                        ],
                        [self._sparse_float_shape1, self._sparse_float_shape2],
                        [self._sparse_int_indices1],
                        [self._sparse_int_values1], [self._sparse_int_shape1],
                        learner_config=learner_config.SerializeToString(),
                        apply_dropout=False,
                        apply_averaging=False,
                        center_bias=False,
                        reduce_dim=True)
                self.assertAllClose([[-1.1], [-1.1]], result.eval())
                # Save before adding other trees.
                val = my_saver.save(sess, save_path)
                self.assertEqual(save_path, val)

                # Add more trees after saving.
                with ops.control_dependencies([
                        ensemble_optimizer_ops.add_trees_to_ensemble(
                            tree_ensemble_handle,
                            tree_ensemble_config3.SerializeToString(),
                            feature_usage_counts, [0],
                            feature_gains, [0], [[]],
                            learning_rate=1)
                ]):
                    result, _, _ = prediction_ops.gradient_trees_prediction(
                        tree_ensemble_handle,
                        self._seed, [self._dense_float_tensor], [
                            self._sparse_float_indices1,
                            self._sparse_float_indices2
                        ], [
                            self._sparse_float_values1,
                            self._sparse_float_values2
                        ],
                        [self._sparse_float_shape1, self._sparse_float_shape2],
                        [self._sparse_int_indices1],
                        [self._sparse_int_values1], [self._sparse_int_shape1],
                        learner_config=learner_config.SerializeToString(),
                        apply_dropout=False,
                        apply_averaging=False,
                        center_bias=False,
                        reduce_dim=True)
                self.assertAllClose(result.eval(), [[-11.1], [-11.1]])

        # Start a second session.  In that session the parameter nodes
        # have not been initialized either.
        with ops.Graph().as_default() as graph:
            with self.test_session(graph) as sess:
                tree_ensemble_handle = model_ops.tree_ensemble_variable(
                    stamp_token=0,
                    tree_ensemble_config="",
                    name="restore_tree")
                my_saver = saver.Saver()
                my_saver.restore(sess, save_path)
                result, _, _ = prediction_ops.gradient_trees_prediction(
                    tree_ensemble_handle,
                    self._seed, [self._dense_float_tensor],
                    [self._sparse_float_indices1, self._sparse_float_indices2],
                    [self._sparse_float_values1, self._sparse_float_values2],
                    [self._sparse_float_shape1, self._sparse_float_shape2],
                    [self._sparse_int_indices1], [self._sparse_int_values1],
                    [self._sparse_int_shape1],
                    learner_config=learner_config.SerializeToString(),
                    apply_dropout=False,
                    apply_averaging=False,
                    center_bias=False,
                    reduce_dim=True)
                # Make sure we only have the first and second tree.
                # The third tree was added after the save.
                self.assertAllClose(result.eval(), [[-1.1], [-1.1]])
示例#8
0
    def testSerialization(self):
        with ops.Graph().as_default() as graph:
            with self.test_session(graph):
                tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig(
                )
                # Bias tree only for second class.
                tree1 = tree_ensemble_config.trees.add()
                _append_to_leaf(tree1.nodes.add().leaf, 1, -0.2)

                tree_ensemble_config.tree_weights.append(1.0)

                # Depth 2 tree.
                tree2 = tree_ensemble_config.trees.add()
                tree_ensemble_config.tree_weights.append(1.0)
                _set_float_split(
                    tree2.nodes.add().sparse_float_binary_split_default_right.
                    split, 1, 4.0, 1, 2)
                _set_float_split(tree2.nodes.add().dense_float_binary_split, 0,
                                 9.0, 3, 4)
                _append_to_leaf(tree2.nodes.add().leaf, 0, 0.5)
                _append_to_leaf(tree2.nodes.add().leaf, 1, 1.2)
                _append_to_leaf(tree2.nodes.add().leaf, 0, -0.9)

                tree_ensemble_handle = model_ops.tree_ensemble_variable(
                    stamp_token=7,
                    tree_ensemble_config=tree_ensemble_config.
                    SerializeToString(),
                    name="saver_tree")
                stamp_token, serialized_config = model_ops.tree_ensemble_serialize(
                    tree_ensemble_handle)
                resources.initialize_resources(
                    resources.shared_resources()).run()
                self.assertEqual(stamp_token.eval(), 7)
                serialized_config = serialized_config.eval()

        with ops.Graph().as_default() as graph:
            with self.test_session(graph):
                tree_ensemble_handle2 = model_ops.tree_ensemble_variable(
                    stamp_token=9,
                    tree_ensemble_config=serialized_config,
                    name="saver_tree2")
                resources.initialize_resources(
                    resources.shared_resources()).run()

                # Prepare learner config.
                learner_config = learner_pb2.LearnerConfig()
                learner_config.num_classes = 3

                result, _, _ = prediction_ops.gradient_trees_prediction(
                    tree_ensemble_handle2,
                    self._seed, [self._dense_float_tensor],
                    [self._sparse_float_indices1, self._sparse_float_indices2],
                    [self._sparse_float_values1, self._sparse_float_values2],
                    [self._sparse_float_shape1, self._sparse_float_shape2],
                    [self._sparse_int_indices1], [self._sparse_int_values1],
                    [self._sparse_int_shape1],
                    learner_config=learner_config.SerializeToString(),
                    apply_dropout=False,
                    apply_averaging=False,
                    center_bias=False,
                    reduce_dim=True)

                # Re-serialize tree.
                stamp_token2, serialized_config2 = model_ops.tree_ensemble_serialize(
                    tree_ensemble_handle2)

                # The first example will get bias class 1 -0.2 from first tree and
                # leaf 2 payload (sparse feature missing) of 0.5 hence [0.5, -0.2],
                # the second example will get the same bias class 1 -0.2 and leaf 3
                # payload of class 1 1.2 hence [0.0, 1.0].
                self.assertEqual(stamp_token2.eval(), 9)

                # Class 2 does have scores in the leaf => it gets score 0.
                self.assertEqual(serialized_config2.eval(), serialized_config)
                self.assertAllClose(result.eval(), [[0.5, -0.2], [0, 1.0]])
示例#9
0
  def testRestore(self):
    # Calling self.cached_session() without a graph specified results in
    # TensorFlowTestCase caching the session and returning the same one
    # every time. In this test, we need to create two different sessions
    # which is why we also create a graph and pass it to self.cached_session()
    # to ensure no caching occurs under the hood.
    save_path = os.path.join(self.get_temp_dir(), "restore-test")
    with ops.Graph().as_default() as graph:
      with self.session(graph) as sess:
        # Prepare learner config.
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2

        # Add the first tree and save.
        tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
        tree = tree_ensemble_config.trees.add()
        tree_ensemble_config.tree_metadata.add().is_finalized = True
        tree_ensemble_config.tree_weights.append(1.0)
        _append_to_leaf(tree.nodes.add().leaf, 0, -0.1)
        tree_ensemble_handle = model_ops.tree_ensemble_variable(
            stamp_token=3,
            tree_ensemble_config=tree_ensemble_config.SerializeToString(),
            name="restore_tree")
        resources.initialize_resources(resources.shared_resources()).run()
        variables.global_variables_initializer().run()
        my_saver = saver.Saver()

        # Add the second tree and replace the ensemble of the handle.
        tree2 = tree_ensemble_config.trees.add()
        tree_ensemble_config.tree_weights.append(1.0)
        _append_to_leaf(tree2.nodes.add().leaf, 0, -1.0)
        # Predict to confirm.
        with ops.control_dependencies([
            model_ops.tree_ensemble_deserialize(
                tree_ensemble_handle,
                stamp_token=3,
                tree_ensemble_config=tree_ensemble_config.SerializeToString())
        ]):
          result, _ = prediction_ops.gradient_trees_prediction(
              tree_ensemble_handle,
              self._seed, [self._dense_float_tensor], [
                  self._sparse_float_indices1, self._sparse_float_indices2
              ], [self._sparse_float_values1, self._sparse_float_values2],
              [self._sparse_float_shape1,
               self._sparse_float_shape2], [self._sparse_int_indices1],
              [self._sparse_int_values1], [self._sparse_int_shape1],
              learner_config=learner_config.SerializeToString(),
              apply_dropout=False,
              apply_averaging=False,
              center_bias=False,
              reduce_dim=True)
        self.assertAllClose([[-1.1], [-1.1]], result.eval())
        # Save before adding other trees.
        val = my_saver.save(sess, save_path)
        self.assertEqual(save_path, val)

        # Add more trees after saving.
        tree3 = tree_ensemble_config.trees.add()
        tree_ensemble_config.tree_weights.append(1.0)
        _append_to_leaf(tree3.nodes.add().leaf, 0, -10.0)
        # Predict to confirm.
        with ops.control_dependencies([
            model_ops.tree_ensemble_deserialize(
                tree_ensemble_handle,
                stamp_token=3,
                tree_ensemble_config=tree_ensemble_config.SerializeToString())
        ]):
          result, _ = prediction_ops.gradient_trees_prediction(
              tree_ensemble_handle,
              self._seed, [self._dense_float_tensor], [
                  self._sparse_float_indices1, self._sparse_float_indices2
              ], [self._sparse_float_values1, self._sparse_float_values2],
              [self._sparse_float_shape1,
               self._sparse_float_shape2], [self._sparse_int_indices1],
              [self._sparse_int_values1], [self._sparse_int_shape1],
              learner_config=learner_config.SerializeToString(),
              apply_dropout=False,
              apply_averaging=False,
              center_bias=False,
              reduce_dim=True)
        self.assertAllClose(result.eval(), [[-11.1], [-11.1]])

    # Start a second session.  In that session the parameter nodes
    # have not been initialized either.
    with ops.Graph().as_default() as graph:
      with self.session(graph) as sess:
        tree_ensemble_handle = model_ops.tree_ensemble_variable(
            stamp_token=0, tree_ensemble_config="", name="restore_tree")
        my_saver = saver.Saver()
        my_saver.restore(sess, save_path)
        result, _ = prediction_ops.gradient_trees_prediction(
            tree_ensemble_handle,
            self._seed, [self._dense_float_tensor], [
                self._sparse_float_indices1, self._sparse_float_indices2
            ], [self._sparse_float_values1, self._sparse_float_values2],
            [self._sparse_float_shape1,
             self._sparse_float_shape2], [self._sparse_int_indices1],
            [self._sparse_int_values1], [self._sparse_int_shape1],
            learner_config=learner_config.SerializeToString(),
            apply_dropout=False,
            apply_averaging=False,
            center_bias=False,
            reduce_dim=True)
        # Make sure we only have the first and second tree.
        # The third tree was added after the save.
        self.assertAllClose(result.eval(), [[-1.1], [-1.1]])
示例#10
0
  def testSerialization(self):
    with ops.Graph().as_default() as graph:
      with self.session(graph):
        tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
        # Bias tree only for second class.
        tree1 = tree_ensemble_config.trees.add()
        _append_to_leaf(tree1.nodes.add().leaf, 1, -0.2)

        tree_ensemble_config.tree_weights.append(1.0)

        # Depth 2 tree.
        tree2 = tree_ensemble_config.trees.add()
        tree_ensemble_config.tree_weights.append(1.0)
        _set_float_split(tree2.nodes.add()
                         .sparse_float_binary_split_default_right.split, 1, 4.0,
                         1, 2)
        _set_float_split(tree2.nodes.add().dense_float_binary_split, 0, 9.0, 3,
                         4)
        _append_to_leaf(tree2.nodes.add().leaf, 0, 0.5)
        _append_to_leaf(tree2.nodes.add().leaf, 1, 1.2)
        _append_to_leaf(tree2.nodes.add().leaf, 0, -0.9)

        tree_ensemble_handle = model_ops.tree_ensemble_variable(
            stamp_token=7,
            tree_ensemble_config=tree_ensemble_config.SerializeToString(),
            name="saver_tree")
        stamp_token, serialized_config = model_ops.tree_ensemble_serialize(
            tree_ensemble_handle)
        resources.initialize_resources(resources.shared_resources()).run()
        self.assertEqual(stamp_token.eval(), 7)
        serialized_config = serialized_config.eval()

    with ops.Graph().as_default() as graph:
      with self.session(graph):
        tree_ensemble_handle2 = model_ops.tree_ensemble_variable(
            stamp_token=9,
            tree_ensemble_config=serialized_config,
            name="saver_tree2")
        resources.initialize_resources(resources.shared_resources()).run()

        # Prepare learner config.
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 3

        result, _ = prediction_ops.gradient_trees_prediction(
            tree_ensemble_handle2,
            self._seed, [self._dense_float_tensor], [
                self._sparse_float_indices1, self._sparse_float_indices2
            ], [self._sparse_float_values1, self._sparse_float_values2],
            [self._sparse_float_shape1,
             self._sparse_float_shape2], [self._sparse_int_indices1],
            [self._sparse_int_values1], [self._sparse_int_shape1],
            learner_config=learner_config.SerializeToString(),
            apply_dropout=False,
            apply_averaging=False,
            center_bias=False,
            reduce_dim=True)

        # Re-serialize tree.
        stamp_token2, serialized_config2 = model_ops.tree_ensemble_serialize(
            tree_ensemble_handle2)

        # The first example will get bias class 1 -0.2 from first tree and
        # leaf 2 payload (sparse feature missing) of 0.5 hence [0.5, -0.2],
        # the second example will get the same bias class 1 -0.2 and leaf 3
        # payload of class 1 1.2 hence [0.0, 1.0].
        self.assertEqual(stamp_token2.eval(), 9)

        # Class 2 does have scores in the leaf => it gets score 0.
        self.assertEqual(serialized_config2.eval(), serialized_config)
        self.assertAllClose(result.eval(), [[0.5, -0.2], [0, 1.0]])
示例#11
0
  def testDropoutSeed(self):
    with self.test_session():
      tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
      # Add 10 trees with some weights.
      for i in range(0, 999):
        tree = tree_ensemble_config.trees.add()
        tree_ensemble_config.tree_metadata.add().is_finalized = True
        _append_to_leaf(tree.nodes.add().leaf, 0, -0.4)
        tree_ensemble_config.tree_weights.append(i + 1)

      # Prepare learner/dropout config.
      learner_config = learner_pb2.LearnerConfig()
      learner_config.learning_rate_tuner.dropout.dropout_probability = 0.5
      learner_config.learning_rate_tuner.dropout.learning_rate = 1.0
      learner_config.num_classes = 2

      tree_ensemble_handle = model_ops.tree_ensemble_variable(
          stamp_token=0,
          tree_ensemble_config=tree_ensemble_config.SerializeToString(),
          name="empty")
      resources.initialize_resources(resources.shared_resources()).run()

      _, dropout_info_1 = self._get_predictions(
          tree_ensemble_handle,
          learner_config=learner_config.SerializeToString(),
          apply_dropout=True,
          apply_averaging=False,
          center_bias=False,
          reduce_dim=True)

      _, dropout_info_2 = self._get_predictions(
          tree_ensemble_handle,
          learner_config=learner_config.SerializeToString(),
          apply_dropout=True,
          apply_averaging=False,
          center_bias=False,
          reduce_dim=True)

      # Different seed.
      _, dropout_info_3 = prediction_ops.gradient_trees_prediction(
          tree_ensemble_handle,
          112314, [self._dense_float_tensor],
          [self._sparse_float_indices1, self._sparse_float_indices2],
          [self._sparse_float_values1, self._sparse_float_values2],
          [self._sparse_float_shape1, self._sparse_float_shape2],
          [self._sparse_int_indices1], [self._sparse_int_values1],
          [self._sparse_int_shape1],
          learner_config=learner_config.SerializeToString(),
          apply_dropout=True,
          apply_averaging=False,
          center_bias=False,
          reduce_dim=True)

      # First seed with centering bias.
      _, dropout_info_4 = self._get_predictions(
          tree_ensemble_handle,
          learner_config=learner_config.SerializeToString(),
          apply_dropout=True,
          apply_averaging=False,
          center_bias=True,
          reduce_dim=True)

      # The same seed returns the same results.
      self.assertAllEqual(dropout_info_1.eval(), dropout_info_2.eval())
      # Different seeds give diff results.
      self.assertNotEqual(dropout_info_3.eval().shape,
                          dropout_info_2.eval().shape)
      # With centering bias and the same seed does not give the same result.
      self.assertNotEqual(dropout_info_4.eval(), dropout_info_1.eval())
      # With centering bias has 1 less tree dropped (bias tree is not dropped).
      self.assertEqual(
          len(dropout_info_4.eval()[0]) + 1, len(dropout_info_1.eval()[0]))