def testCreate(self): with self.test_session(): tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() tree = tree_ensemble_config.trees.add() _append_to_leaf(tree.nodes.add().leaf, 0, -0.4) tree_ensemble_config.tree_weights.append(1.0) # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=3, tree_ensemble_config=tree_ensemble_config.SerializeToString(), name="create_tree") resources.initialize_resources(resources.shared_resources()).run() result, _, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [self._sparse_float_indices1, self._sparse_float_indices2], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) self.assertAllClose(result.eval(), [[-0.4], [-0.4]]) stamp_token = model_ops.tree_ensemble_stamp_token( tree_ensemble_handle) self.assertEqual(stamp_token.eval(), 3)
def testCreate(self): with self.cached_session(): tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() tree = tree_ensemble_config.trees.add() _append_to_leaf(tree.nodes.add().leaf, 0, -0.4) tree_ensemble_config.tree_weights.append(1.0) # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=3, tree_ensemble_config=tree_ensemble_config.SerializeToString(), name="create_tree") resources.initialize_resources(resources.shared_resources()).run() result, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [ self._sparse_float_indices1, self._sparse_float_indices2 ], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) self.assertAllClose(result.eval(), [[-0.4], [-0.4]]) stamp_token = model_ops.tree_ensemble_stamp_token(tree_ensemble_handle) self.assertEqual(stamp_token.eval(), 3)
def _predict_and_return_dict(self, ensemble_handle, ensemble_stamp, mode): """Runs prediction and returns a dictionary of the prediction results. Args: ensemble_handle: ensemble resource handle. ensemble_stamp: stamp of ensemble resource. mode: learn.ModeKeys.TRAIN or EVAL or INFER. Returns: a dictionary of prediction results - ENSEMBLE_STAMP, PREDICTION, PARTITION_IDS, NUM_LAYER_ATTEMPTED, NUM_TREES_ATTEMPED. """ ensemble_stats = training_ops.tree_ensemble_stats( ensemble_handle, ensemble_stamp) num_handlers = (len(self._dense_floats) + len(self._sparse_float_shapes) + len(self._sparse_int_shapes)) # Used during feature selection. used_handlers = model_ops.tree_ensemble_used_handlers( ensemble_handle, ensemble_stamp, num_all_handlers=num_handlers) # We don't need dropout info - we can always restore it based on the # seed. apply_dropout, seed = _dropout_params(mode, ensemble_stats) # Make sure ensemble stats run. This will check that the ensemble has # the right stamp. with ops.control_dependencies(ensemble_stats): predictions, _ = prediction_ops.gradient_trees_prediction( ensemble_handle, seed, self._dense_floats, self._sparse_float_indices, self._sparse_float_values, self._sparse_float_shapes, self._sparse_int_indices, self._sparse_int_values, self._sparse_int_shapes, learner_config=self._learner_config_serialized, apply_dropout=apply_dropout, apply_averaging=mode != learn.ModeKeys.TRAIN, use_locking=True, center_bias=self._center_bias, reduce_dim=self._reduce_dim) partition_ids = prediction_ops.gradient_trees_partition_examples( ensemble_handle, self._dense_floats, self._sparse_float_indices, self._sparse_float_values, self._sparse_float_shapes, self._sparse_int_indices, self._sparse_int_values, self._sparse_int_shapes, use_locking=True) return _make_predictions_dict(ensemble_stamp, predictions, partition_ids, ensemble_stats, used_handlers)
def _predict_and_return_dict(self, ensemble_handle, ensemble_stamp, mode): """Runs prediction and returns a dictionary of the prediction results. Args: ensemble_handle: ensemble resource handle. ensemble_stamp: stamp of ensemble resource. mode: learn.ModeKeys.TRAIN or EVAL or INFER. Returns: a dictionary of prediction results - ENSEMBLE_STAMP, PREDICTION, PARTITION_IDS, NUM_LAYER_ATTEMPTED, NUM_TREES_ATTEMPED. """ ensemble_stats = training_ops.tree_ensemble_stats(ensemble_handle, ensemble_stamp) num_handlers = ( len(self._dense_floats) + len(self._sparse_float_shapes) + len(self._sparse_int_shapes)) # Used during feature selection. used_handlers = model_ops.tree_ensemble_used_handlers( ensemble_handle, ensemble_stamp, num_all_handlers=num_handlers) # We don't need dropout info - we can always restore it based on the # seed. apply_dropout, seed = _dropout_params(mode, ensemble_stats) # Make sure ensemble stats run. This will check that the ensemble has # the right stamp. with ops.control_dependencies(ensemble_stats): predictions, _ = prediction_ops.gradient_trees_prediction( ensemble_handle, seed, self._dense_floats, self._sparse_float_indices, self._sparse_float_values, self._sparse_float_shapes, self._sparse_int_indices, self._sparse_int_values, self._sparse_int_shapes, learner_config=self._learner_config_serialized, apply_dropout=apply_dropout, apply_averaging=mode != learn.ModeKeys.TRAIN, use_locking=True, center_bias=self._center_bias, reduce_dim=self._reduce_dim) partition_ids = prediction_ops.gradient_trees_partition_examples( ensemble_handle, self._dense_floats, self._sparse_float_indices, self._sparse_float_values, self._sparse_float_shapes, self._sparse_int_indices, self._sparse_int_values, self._sparse_int_shapes, use_locking=True) return _make_predictions_dict(ensemble_stamp, predictions, partition_ids, ensemble_stats, used_handlers)
def _get_predictions(self, tree_ensemble_handle, learner_config, apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=False): return prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [self._sparse_float_indices1, self._sparse_float_indices2], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config, apply_dropout=apply_dropout, apply_averaging=apply_averaging, center_bias=center_bias, reduce_dim=reduce_dim)
def predict(self, mode): """Returns predictions given the features and mode. Args: mode: Mode the graph is running in (train|predict|eval). Returns: A dict of predictions tensors. Raises: ValueError: if features is not valid. """ apply_averaging = mode != learn.ModeKeys.TRAIN # Use the current ensemble to predict on the current batch of input. # For faster prediction we check if the inputs are on the same device # as the model. If not, we create a copy of the model on the worker. input_deps = (self._dense_floats + self._sparse_float_indices + self._sparse_int_indices) if not input_deps: raise ValueError("No input tensors for prediction.") if any(i.device != input_deps[0].device for i in input_deps): raise ValueError("All input tensors should be on the same device.") # Get most current model stamp. ensemble_stamp = model_ops.tree_ensemble_stamp_token(self._ensemble_handle) # Determine if ensemble is colocated with the inputs. if self._ensemble_handle.device != input_deps[0].device: # Create a local ensemble and get its local stamp. with ops.name_scope("local_ensemble", "TreeEnsembleVariable") as name: local_ensemble_handle = ( gen_model_ops.decision_tree_ensemble_resource_handle_op(name=name)) create_op = gen_model_ops.create_tree_ensemble_variable( local_ensemble_handle, stamp_token=-1, tree_ensemble_config="") with ops.control_dependencies([create_op]): local_stamp = model_ops.tree_ensemble_stamp_token( local_ensemble_handle) # Determine whether the local ensemble is stale and update it if needed. def _refresh_local_ensemble_fn(): # Serialize the model from parameter server after reading all inputs. with ops.control_dependencies(input_deps): (ensemble_stamp, serialized_model) = ( model_ops.tree_ensemble_serialize(self._ensemble_handle)) # Update local ensemble with the serialized model from parameter server. with ops.control_dependencies([create_op]): return model_ops.tree_ensemble_deserialize( local_ensemble_handle, stamp_token=ensemble_stamp, tree_ensemble_config=serialized_model), ensemble_stamp refresh_local_ensemble, ensemble_stamp = control_flow_ops.cond( math_ops.not_equal(ensemble_stamp, local_stamp), _refresh_local_ensemble_fn, lambda: (control_flow_ops.no_op(), ensemble_stamp)) # Once updated, Use the the local model for prediction. with ops.control_dependencies([refresh_local_ensemble]): ensemble_stats = training_ops.tree_ensemble_stats( local_ensemble_handle, ensemble_stamp) apply_dropout, seed = _dropout_params(mode, ensemble_stats) # We don't need dropout info - we can always restore it based on the # seed. predictions, predictions_no_dropout, _ = ( prediction_ops.gradient_trees_prediction( local_ensemble_handle, seed, self._dense_floats, self._sparse_float_indices, self._sparse_float_values, self._sparse_float_shapes, self._sparse_int_indices, self._sparse_int_values, self._sparse_int_shapes, learner_config=self._learner_config_serialized, apply_dropout=apply_dropout, apply_averaging=apply_averaging, use_locking=False, center_bias=self._center_bias, reduce_dim=self._reduce_dim)) partition_ids = prediction_ops.gradient_trees_partition_examples( local_ensemble_handle, self._dense_floats, self._sparse_float_indices, self._sparse_float_values, self._sparse_float_shapes, self._sparse_int_indices, self._sparse_int_values, self._sparse_int_shapes, use_locking=False) else: with ops.device(self._ensemble_handle.device): ensemble_stats = training_ops.tree_ensemble_stats( self._ensemble_handle, ensemble_stamp) apply_dropout, seed = _dropout_params(mode, ensemble_stats) # We don't need dropout info - we can always restore it based on the # seed. predictions, predictions_no_dropout, _ = ( prediction_ops.gradient_trees_prediction( self._ensemble_handle, seed, self._dense_floats, self._sparse_float_indices, self._sparse_float_values, self._sparse_float_shapes, self._sparse_int_indices, self._sparse_int_values, self._sparse_int_shapes, learner_config=self._learner_config_serialized, apply_dropout=apply_dropout, apply_averaging=apply_averaging, use_locking=False, center_bias=self._center_bias, reduce_dim=self._reduce_dim)) partition_ids = prediction_ops.gradient_trees_partition_examples( self._ensemble_handle, self._dense_floats, self._sparse_float_indices, self._sparse_float_values, self._sparse_float_shapes, self._sparse_int_indices, self._sparse_int_values, self._sparse_int_shapes, use_locking=False) return _make_predictions_dict(ensemble_stamp, predictions, predictions_no_dropout, partition_ids, ensemble_stats)
def testRestore(self): # Calling self.test_session() without a graph specified results in # TensorFlowTestCase caching the session and returning the same one # every time. In this test, we need to create two different sessions # which is why we also create a graph and pass it to self.test_session() # to ensure no caching occurs under the hood. save_path = os.path.join(self.get_temp_dir(), "restore-test") with ops.Graph().as_default() as graph: with self.test_session(graph) as sess: tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig( ) tree = tree_ensemble_config.trees.add() tree_ensemble_config.tree_metadata.add().is_finalized = True tree_ensemble_config.tree_weights.append(1.0) _append_to_leaf(tree.nodes.add().leaf, 0, -0.1) tree_ensemble_config2 = tree_config_pb2.DecisionTreeEnsembleConfig( ) tree2 = tree_ensemble_config2.trees.add() tree_ensemble_config.tree_weights.append(1.0) _append_to_leaf(tree2.nodes.add().leaf, 0, -1.0) tree_ensemble_config3 = tree_config_pb2.DecisionTreeEnsembleConfig( ) tree3 = tree_ensemble_config3.trees.add() tree_ensemble_config.tree_weights.append(1.0) _append_to_leaf(tree3.nodes.add().leaf, 0, -10.0) # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=3, tree_ensemble_config=tree_ensemble_config. SerializeToString(), name="restore_tree") feature_usage_counts = variables.Variable( initial_value=array_ops.zeros([1], dtypes.int64), name="feature_usage_counts", trainable=False) feature_gains = variables.Variable( initial_value=array_ops.zeros([1], dtypes.float32), name="feature_gains", trainable=False) resources.initialize_resources( resources.shared_resources()).run() variables.initialize_all_variables().run() my_saver = saver.Saver() with ops.control_dependencies([ ensemble_optimizer_ops.add_trees_to_ensemble( tree_ensemble_handle, tree_ensemble_config2.SerializeToString(), feature_usage_counts, [0], feature_gains, [0], [[]], learning_rate=1) ]): result, _, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [ self._sparse_float_indices1, self._sparse_float_indices2 ], [ self._sparse_float_values1, self._sparse_float_values2 ], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) self.assertAllClose([[-1.1], [-1.1]], result.eval()) # Save before adding other trees. val = my_saver.save(sess, save_path) self.assertEqual(save_path, val) # Add more trees after saving. with ops.control_dependencies([ ensemble_optimizer_ops.add_trees_to_ensemble( tree_ensemble_handle, tree_ensemble_config3.SerializeToString(), feature_usage_counts, [0], feature_gains, [0], [[]], learning_rate=1) ]): result, _, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [ self._sparse_float_indices1, self._sparse_float_indices2 ], [ self._sparse_float_values1, self._sparse_float_values2 ], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) self.assertAllClose(result.eval(), [[-11.1], [-11.1]]) # Start a second session. In that session the parameter nodes # have not been initialized either. with ops.Graph().as_default() as graph: with self.test_session(graph) as sess: tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="restore_tree") my_saver = saver.Saver() my_saver.restore(sess, save_path) result, _, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [self._sparse_float_indices1, self._sparse_float_indices2], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) # Make sure we only have the first and second tree. # The third tree was added after the save. self.assertAllClose(result.eval(), [[-1.1], [-1.1]])
def testSerialization(self): with ops.Graph().as_default() as graph: with self.test_session(graph): tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig( ) # Bias tree only for second class. tree1 = tree_ensemble_config.trees.add() _append_to_leaf(tree1.nodes.add().leaf, 1, -0.2) tree_ensemble_config.tree_weights.append(1.0) # Depth 2 tree. tree2 = tree_ensemble_config.trees.add() tree_ensemble_config.tree_weights.append(1.0) _set_float_split( tree2.nodes.add().sparse_float_binary_split_default_right. split, 1, 4.0, 1, 2) _set_float_split(tree2.nodes.add().dense_float_binary_split, 0, 9.0, 3, 4) _append_to_leaf(tree2.nodes.add().leaf, 0, 0.5) _append_to_leaf(tree2.nodes.add().leaf, 1, 1.2) _append_to_leaf(tree2.nodes.add().leaf, 0, -0.9) tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=7, tree_ensemble_config=tree_ensemble_config. SerializeToString(), name="saver_tree") stamp_token, serialized_config = model_ops.tree_ensemble_serialize( tree_ensemble_handle) resources.initialize_resources( resources.shared_resources()).run() self.assertEqual(stamp_token.eval(), 7) serialized_config = serialized_config.eval() with ops.Graph().as_default() as graph: with self.test_session(graph): tree_ensemble_handle2 = model_ops.tree_ensemble_variable( stamp_token=9, tree_ensemble_config=serialized_config, name="saver_tree2") resources.initialize_resources( resources.shared_resources()).run() # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 3 result, _, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle2, self._seed, [self._dense_float_tensor], [self._sparse_float_indices1, self._sparse_float_indices2], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) # Re-serialize tree. stamp_token2, serialized_config2 = model_ops.tree_ensemble_serialize( tree_ensemble_handle2) # The first example will get bias class 1 -0.2 from first tree and # leaf 2 payload (sparse feature missing) of 0.5 hence [0.5, -0.2], # the second example will get the same bias class 1 -0.2 and leaf 3 # payload of class 1 1.2 hence [0.0, 1.0]. self.assertEqual(stamp_token2.eval(), 9) # Class 2 does have scores in the leaf => it gets score 0. self.assertEqual(serialized_config2.eval(), serialized_config) self.assertAllClose(result.eval(), [[0.5, -0.2], [0, 1.0]])
def testRestore(self): # Calling self.cached_session() without a graph specified results in # TensorFlowTestCase caching the session and returning the same one # every time. In this test, we need to create two different sessions # which is why we also create a graph and pass it to self.cached_session() # to ensure no caching occurs under the hood. save_path = os.path.join(self.get_temp_dir(), "restore-test") with ops.Graph().as_default() as graph: with self.session(graph) as sess: # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 # Add the first tree and save. tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() tree = tree_ensemble_config.trees.add() tree_ensemble_config.tree_metadata.add().is_finalized = True tree_ensemble_config.tree_weights.append(1.0) _append_to_leaf(tree.nodes.add().leaf, 0, -0.1) tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=3, tree_ensemble_config=tree_ensemble_config.SerializeToString(), name="restore_tree") resources.initialize_resources(resources.shared_resources()).run() variables.global_variables_initializer().run() my_saver = saver.Saver() # Add the second tree and replace the ensemble of the handle. tree2 = tree_ensemble_config.trees.add() tree_ensemble_config.tree_weights.append(1.0) _append_to_leaf(tree2.nodes.add().leaf, 0, -1.0) # Predict to confirm. with ops.control_dependencies([ model_ops.tree_ensemble_deserialize( tree_ensemble_handle, stamp_token=3, tree_ensemble_config=tree_ensemble_config.SerializeToString()) ]): result, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [ self._sparse_float_indices1, self._sparse_float_indices2 ], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) self.assertAllClose([[-1.1], [-1.1]], result.eval()) # Save before adding other trees. val = my_saver.save(sess, save_path) self.assertEqual(save_path, val) # Add more trees after saving. tree3 = tree_ensemble_config.trees.add() tree_ensemble_config.tree_weights.append(1.0) _append_to_leaf(tree3.nodes.add().leaf, 0, -10.0) # Predict to confirm. with ops.control_dependencies([ model_ops.tree_ensemble_deserialize( tree_ensemble_handle, stamp_token=3, tree_ensemble_config=tree_ensemble_config.SerializeToString()) ]): result, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [ self._sparse_float_indices1, self._sparse_float_indices2 ], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) self.assertAllClose(result.eval(), [[-11.1], [-11.1]]) # Start a second session. In that session the parameter nodes # have not been initialized either. with ops.Graph().as_default() as graph: with self.session(graph) as sess: tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="restore_tree") my_saver = saver.Saver() my_saver.restore(sess, save_path) result, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [ self._sparse_float_indices1, self._sparse_float_indices2 ], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) # Make sure we only have the first and second tree. # The third tree was added after the save. self.assertAllClose(result.eval(), [[-1.1], [-1.1]])
def testSerialization(self): with ops.Graph().as_default() as graph: with self.session(graph): tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() # Bias tree only for second class. tree1 = tree_ensemble_config.trees.add() _append_to_leaf(tree1.nodes.add().leaf, 1, -0.2) tree_ensemble_config.tree_weights.append(1.0) # Depth 2 tree. tree2 = tree_ensemble_config.trees.add() tree_ensemble_config.tree_weights.append(1.0) _set_float_split(tree2.nodes.add() .sparse_float_binary_split_default_right.split, 1, 4.0, 1, 2) _set_float_split(tree2.nodes.add().dense_float_binary_split, 0, 9.0, 3, 4) _append_to_leaf(tree2.nodes.add().leaf, 0, 0.5) _append_to_leaf(tree2.nodes.add().leaf, 1, 1.2) _append_to_leaf(tree2.nodes.add().leaf, 0, -0.9) tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=7, tree_ensemble_config=tree_ensemble_config.SerializeToString(), name="saver_tree") stamp_token, serialized_config = model_ops.tree_ensemble_serialize( tree_ensemble_handle) resources.initialize_resources(resources.shared_resources()).run() self.assertEqual(stamp_token.eval(), 7) serialized_config = serialized_config.eval() with ops.Graph().as_default() as graph: with self.session(graph): tree_ensemble_handle2 = model_ops.tree_ensemble_variable( stamp_token=9, tree_ensemble_config=serialized_config, name="saver_tree2") resources.initialize_resources(resources.shared_resources()).run() # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 3 result, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle2, self._seed, [self._dense_float_tensor], [ self._sparse_float_indices1, self._sparse_float_indices2 ], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) # Re-serialize tree. stamp_token2, serialized_config2 = model_ops.tree_ensemble_serialize( tree_ensemble_handle2) # The first example will get bias class 1 -0.2 from first tree and # leaf 2 payload (sparse feature missing) of 0.5 hence [0.5, -0.2], # the second example will get the same bias class 1 -0.2 and leaf 3 # payload of class 1 1.2 hence [0.0, 1.0]. self.assertEqual(stamp_token2.eval(), 9) # Class 2 does have scores in the leaf => it gets score 0. self.assertEqual(serialized_config2.eval(), serialized_config) self.assertAllClose(result.eval(), [[0.5, -0.2], [0, 1.0]])
def testDropoutSeed(self): with self.test_session(): tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() # Add 10 trees with some weights. for i in range(0, 999): tree = tree_ensemble_config.trees.add() tree_ensemble_config.tree_metadata.add().is_finalized = True _append_to_leaf(tree.nodes.add().leaf, 0, -0.4) tree_ensemble_config.tree_weights.append(i + 1) # Prepare learner/dropout config. learner_config = learner_pb2.LearnerConfig() learner_config.learning_rate_tuner.dropout.dropout_probability = 0.5 learner_config.learning_rate_tuner.dropout.learning_rate = 1.0 learner_config.num_classes = 2 tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config=tree_ensemble_config.SerializeToString(), name="empty") resources.initialize_resources(resources.shared_resources()).run() _, dropout_info_1 = self._get_predictions( tree_ensemble_handle, learner_config=learner_config.SerializeToString(), apply_dropout=True, apply_averaging=False, center_bias=False, reduce_dim=True) _, dropout_info_2 = self._get_predictions( tree_ensemble_handle, learner_config=learner_config.SerializeToString(), apply_dropout=True, apply_averaging=False, center_bias=False, reduce_dim=True) # Different seed. _, dropout_info_3 = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, 112314, [self._dense_float_tensor], [self._sparse_float_indices1, self._sparse_float_indices2], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=True, apply_averaging=False, center_bias=False, reduce_dim=True) # First seed with centering bias. _, dropout_info_4 = self._get_predictions( tree_ensemble_handle, learner_config=learner_config.SerializeToString(), apply_dropout=True, apply_averaging=False, center_bias=True, reduce_dim=True) # The same seed returns the same results. self.assertAllEqual(dropout_info_1.eval(), dropout_info_2.eval()) # Different seeds give diff results. self.assertNotEqual(dropout_info_3.eval().shape, dropout_info_2.eval().shape) # With centering bias and the same seed does not give the same result. self.assertNotEqual(dropout_info_4.eval(), dropout_info_1.eval()) # With centering bias has 1 less tree dropped (bias tree is not dropped). self.assertEqual( len(dropout_info_4.eval()[0]) + 1, len(dropout_info_1.eval()[0]))