def model_builder(features, labels, mode, params, config, output_type=ModelBuilderOutputType.MODEL_FN_OPS): """Multi-machine batch gradient descent tree model. Args: features: `Tensor` or `dict` of `Tensor` objects. labels: Labels used to train on. mode: Mode we are in. (TRAIN/EVAL/INFER) params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * learner_config: A config for the learner. * feature_columns: An iterable containing all the feature columns used by the model. * examples_per_layer: Number of examples to accumulate before growing a layer. It can also be a function that computes the number of examples based on the depth of the layer that's being built. * weight_column_name: The name of weight column. * center_bias: Whether a separate tree should be created for first fitting the bias. config: `RunConfig` of the estimator. output_type: Whether to return ModelFnOps (old interface) or EstimatorSpec (new interface). Returns: A `ModelFnOps` object. Raises: ValueError: if inputs are not valid. """ head = params["head"] learner_config = params["learner_config"] examples_per_layer = params["examples_per_layer"] feature_columns = params["feature_columns"] weight_column_name = params["weight_column_name"] num_trees = params["num_trees"] use_core_libs = params["use_core_libs"] logits_modifier_function = params["logits_modifier_function"] output_leaf_index = params["output_leaf_index"] if features is None: raise ValueError("At least one feature must be specified.") if config is None: raise ValueError("Missing estimator RunConfig.") center_bias = params["center_bias"] if isinstance(features, ops.Tensor): features = {features.name: features} # Make a shallow copy of features to ensure downstream usage # is unaffected by modifications in the model function. training_features = copy.copy(features) training_features.pop(weight_column_name, None) global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", # Initialize an empty ensemble. name="ensemble_model") # Create GBDT model. gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=center_bias, examples_per_layer=examples_per_layer, learner_config=learner_config, feature_columns=feature_columns, logits_dimension=head.logits_dimension, features=training_features, use_core_columns=use_core_libs, output_leaf_index=output_leaf_index) with ops.name_scope("gbdt", "gbdt_optimizer"): predictions_dict = gbdt_model.predict(mode) logits = predictions_dict["predictions"] if logits_modifier_function: logits = logits_modifier_function(logits, features, mode) def _train_op_fn(loss): """Returns the op to optimize the loss.""" update_op = gbdt_model.train(loss, predictions_dict, labels) with ops.control_dependencies( [update_op]), (ops.colocate_with(global_step)): update_op = state_ops.assign_add(global_step, 1).op return update_op create_estimator_spec_op = getattr(head, "create_estimator_spec", None) training_hooks = [] if num_trees: if center_bias: num_trees += 1 finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor( ) training_hooks.append( trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees)) if output_type == ModelBuilderOutputType.MODEL_FN_OPS: if use_core_libs and callable(create_estimator_spec_op): model_fn_ops = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops( model_fn_ops) else: model_fn_ops = head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) if output_leaf_index and gbdt_batch.LEAF_INDEX in predictions_dict: model_fn_ops.predictions[gbdt_batch.LEAF_INDEX] = predictions_dict[ gbdt_batch.LEAF_INDEX] model_fn_ops.training_hooks.extend(training_hooks) return model_fn_ops elif output_type == ModelBuilderOutputType.ESTIMATOR_SPEC: assert callable(create_estimator_spec_op) estimator_spec = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) estimator_spec = estimator_spec._replace( training_hooks=training_hooks + list(estimator_spec.training_hooks)) return estimator_spec return model_fn_ops
def ranking_model_builder(features, labels, mode, params, config, output_type=ModelBuilderOutputType.MODEL_FN_OPS): """Multi-machine batch gradient descent tree model for ranking. Args: features: `Tensor` or `dict` of `Tensor` objects. labels: Labels used to train on. mode: Mode we are in. (TRAIN/EVAL/INFER) params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * learner_config: A config for the learner. * feature_columns: An iterable containing all the feature columns used by the model. * examples_per_layer: Number of examples to accumulate before growing a layer. It can also be a function that computes the number of examples based on the depth of the layer that's being built. * weight_column_name: The name of weight column. * center_bias: Whether a separate tree should be created for first fitting the bias. * ranking_model_pair_keys (Optional): Keys to distinguish between features for left and right part of the training pairs for ranking. For example, for an Example with features "a.f1" and "b.f1", the keys would be ("a", "b"). config: `RunConfig` of the estimator. output_type: Whether to return ModelFnOps (old interface) or EstimatorSpec (new interface). Returns: A `ModelFnOps` object. Raises: ValueError: if inputs are not valid. """ head = params["head"] learner_config = params["learner_config"] examples_per_layer = params["examples_per_layer"] feature_columns = params["feature_columns"] weight_column_name = params["weight_column_name"] num_trees = params["num_trees"] use_core_libs = params["use_core_libs"] logits_modifier_function = params["logits_modifier_function"] output_leaf_index = params["output_leaf_index"] ranking_model_pair_keys = params["ranking_model_pair_keys"] if features is None: raise ValueError("At least one feature must be specified.") if config is None: raise ValueError("Missing estimator RunConfig.") center_bias = params["center_bias"] if isinstance(features, ops.Tensor): features = {features.name: features} # Make a shallow copy of features to ensure downstream usage # is unaffected by modifications in the model function. training_features = copy.copy(features) training_features.pop(weight_column_name, None) global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", # Initialize an empty ensemble. name="ensemble_model") # Extract the features. if mode == learn.ModeKeys.TRAIN or mode == learn.ModeKeys.EVAL: # For ranking pairwise training, we extract two sets of features. if len(ranking_model_pair_keys) != 2: raise ValueError("You must provide keys for ranking.") left_pair_key = ranking_model_pair_keys[0] right_pair_key = ranking_model_pair_keys[1] if left_pair_key is None or right_pair_key is None: raise ValueError("Both pair keys should be provided for ranking.") features_1 = {} features_2 = {} for name in training_features: feature = training_features[name] new_name = name[2:] if name.startswith(left_pair_key + "."): features_1[new_name] = feature else: assert name.startswith(right_pair_key + ".") features_2[new_name] = feature main_features = features_1 supplementary_features = features_2 else: # For non-ranking or inference ranking, we have only 1 set of features. main_features = training_features # Create GBDT model. gbdt_model_main = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=center_bias, examples_per_layer=examples_per_layer, learner_config=learner_config, feature_columns=feature_columns, logits_dimension=head.logits_dimension, features=main_features, use_core_columns=use_core_libs, output_leaf_index=output_leaf_index) with ops.name_scope("gbdt", "gbdt_optimizer"): # Logits for inference. if mode == learn.ModeKeys.INFER: predictions_dict = gbdt_model_main.predict(mode) logits = predictions_dict[gbdt_batch.PREDICTIONS] if logits_modifier_function: logits = logits_modifier_function(logits, features, mode) else: gbdt_model_supplementary = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=center_bias, examples_per_layer=examples_per_layer, learner_config=learner_config, feature_columns=feature_columns, logits_dimension=head.logits_dimension, features=supplementary_features, use_core_columns=use_core_libs, output_leaf_index=output_leaf_index) # Logits for train and eval. if not supplementary_features: raise ValueError("Features for ranking must be specified.") predictions_dict_1 = gbdt_model_main.predict(mode) predictions_1 = predictions_dict_1[gbdt_batch.PREDICTIONS] predictions_dict_2 = gbdt_model_supplementary.predict(mode) predictions_2 = predictions_dict_2[gbdt_batch.PREDICTIONS] logits = predictions_1 - predictions_2 if logits_modifier_function: logits = logits_modifier_function(logits, features, mode) predictions_dict = predictions_dict_1 predictions_dict[gbdt_batch.PREDICTIONS] = logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" update_op = gbdt_model_main.train(loss, predictions_dict, labels) with ops.control_dependencies( [update_op]), (ops.colocate_with(global_step)): update_op = state_ops.assign_add(global_step, 1).op return update_op create_estimator_spec_op = getattr(head, "create_estimator_spec", None) training_hooks = [] if num_trees: if center_bias: num_trees += 1 finalized_trees, attempted_trees = ( gbdt_model_main.get_number_of_trees_tensor()) training_hooks.append( trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees)) if output_type == ModelBuilderOutputType.MODEL_FN_OPS: if use_core_libs and callable(create_estimator_spec_op): model_fn_ops = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops( model_fn_ops) else: model_fn_ops = head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) if output_leaf_index and gbdt_batch.LEAF_INDEX in predictions_dict: model_fn_ops.predictions[gbdt_batch.LEAF_INDEX] = predictions_dict[ gbdt_batch.LEAF_INDEX] model_fn_ops.training_hooks.extend(training_hooks) return model_fn_ops elif output_type == ModelBuilderOutputType.ESTIMATOR_SPEC: assert callable(create_estimator_spec_op) estimator_spec = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) estimator_spec = estimator_spec._replace( training_hooks=training_hooks + list(estimator_spec.training_hooks)) return estimator_spec return model_fn_ops
def _dnn_tree_combined_model_fn(features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, tree_learner_config, num_trees, tree_examples_per_layer, config=None, dnn_optimizer="Adagrad", dnn_activation_fn=nn.relu, dnn_dropout=None, dnn_input_layer_partitioner=None, dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, tree_feature_columns=None, tree_center_bias=False, use_core_versions=False): """DNN and GBDT combined model_fn. Args: features: `dict` of `Tensor` objects. labels: Labels used to train on. mode: Mode we are in. (TRAIN/EVAL/INFER) head: A `Head` instance. dnn_hidden_units: List of hidden units per layer. dnn_feature_columns: An iterable containing all the feature columns used by the model's DNN. tree_learner_config: A config for the tree learner. num_trees: Number of trees to grow model to after training DNN. tree_examples_per_layer: Number of examples to accumulate before growing the tree a layer. This value has a big impact on model quality and should be set equal to the number of examples in training dataset if possible. It can also be a function that computes the number of examples based on the depth of the layer that's being built. config: `RunConfig` of the estimator. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN. If `None`, will use the Adagrad optimizer with default learning rate of 0.001. dnn_activation_fn: Activation function applied to each layer of the DNN. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability to drop out a given unit in the DNN. dnn_input_layer_partitioner: Partitioner for input layer of the DNN. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. dnn_input_layer_to_tree: Whether to provide the DNN's input layer as a feature to the tree. dnn_steps_to_train: Number of steps to train dnn for before switching to gbdt. tree_feature_columns: An iterable containing all the feature columns used by the model's boosted trees. If dnn_input_layer_to_tree is set to True, these features are in addition to dnn_feature_columns. tree_center_bias: Whether a separate tree should be created for first fitting the bias. use_core_versions: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. Returns: A `ModelFnOps` object. Raises: ValueError: if inputs are not valid. """ if not isinstance(features, dict): raise ValueError("features should be a dictionary of `Tensor`s. " "Given type: {}".format(type(features))) if not dnn_feature_columns: raise ValueError("dnn_feature_columns must be specified") # Build DNN Logits. dnn_parent_scope = "dnn" dnn_partitioner = dnn_input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=config.num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope(dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=dnn_partitioner) as input_layer_scope: if use_core_versions: input_layer = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) else: input_layer = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=input_layer_scope) previous_layer = input_layer for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(previous_layer, )) as hidden_layer_scope: net = layers.fully_connected( previous_layer, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) previous_layer = net with variable_scope.variable_scope( "logits", values=(previous_layer, )) as logits_scope: dnn_logits = layers.fully_connected( previous_layer, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=logits_scope) _add_hidden_layer_summary(dnn_logits, logits_scope.name) def _dnn_train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=training_util.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), name=dnn_parent_scope, variables=ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) # Build Tree Logits. global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", # Initialize an empty ensemble. name="ensemble_model") tree_features = features.copy() if dnn_input_layer_to_tree: tree_features["dnn_input_layer"] = input_layer tree_feature_columns.append( layers.real_valued_column("dnn_input_layer")) gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=tree_center_bias, examples_per_layer=tree_examples_per_layer, learner_config=tree_learner_config, feature_columns=tree_feature_columns, logits_dimension=head.logits_dimension, features=tree_features) with ops.name_scope("gbdt"): predictions_dict = gbdt_model.predict(mode) tree_logits = predictions_dict["predictions"] def _tree_train_op_fn(loss): """Returns the op to optimize the loss.""" update_op = gbdt_model.train(loss, predictions_dict, labels) with ops.control_dependencies( [update_op]), (ops.colocate_with(global_step)): update_op = state_ops.assign_add(global_step, 1).op return update_op tree_train_logits = dnn_logits + tree_logits def _no_train_op_fn(loss): """Returns a no-op.""" del loss return control_flow_ops.no_op() if use_core_versions: model_fn_ops = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops( dnn_train_op).train_op tree_train_op = head.create_estimator_spec( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops( tree_train_op).train_op model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops( model_fn_ops) else: model_fn_ops = head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits).train_op tree_train_op = head.create_model_fn_ops( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits).train_op if tree_center_bias: num_trees += 1 finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor() model_fn_ops.training_hooks.extend([ trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train, tree_train_op), trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees) ]) return model_fn_ops
def _dnn_tree_combined_model_fn( features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, tree_learner_config, num_trees, tree_examples_per_layer, config=None, dnn_optimizer="Adagrad", dnn_activation_fn=nn.relu, dnn_dropout=None, dnn_input_layer_partitioner=None, dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, predict_with_tree_only=False, tree_feature_columns=None, tree_center_bias=False, dnn_to_tree_distillation_param=None, use_core_versions=False, output_type=model.ModelBuilderOutputType.MODEL_FN_OPS, override_global_step_value=None): """DNN and GBDT combined model_fn. Args: features: `dict` of `Tensor` objects. labels: Labels used to train on. mode: Mode we are in. (TRAIN/EVAL/INFER) head: A `Head` instance. dnn_hidden_units: List of hidden units per layer. dnn_feature_columns: An iterable containing all the feature columns used by the model's DNN. tree_learner_config: A config for the tree learner. num_trees: Number of trees to grow model to after training DNN. tree_examples_per_layer: Number of examples to accumulate before growing the tree a layer. This value has a big impact on model quality and should be set equal to the number of examples in training dataset if possible. It can also be a function that computes the number of examples based on the depth of the layer that's being built. config: `RunConfig` of the estimator. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN. If `None`, will use the Adagrad optimizer with default learning rate of 0.001. dnn_activation_fn: Activation function applied to each layer of the DNN. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability to drop out a given unit in the DNN. dnn_input_layer_partitioner: Partitioner for input layer of the DNN. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. dnn_input_layer_to_tree: Whether to provide the DNN's input layer as a feature to the tree. dnn_steps_to_train: Number of steps to train dnn for before switching to gbdt. predict_with_tree_only: Whether to use only the tree model output as the final prediction. tree_feature_columns: An iterable containing all the feature columns used by the model's boosted trees. If dnn_input_layer_to_tree is set to True, these features are in addition to dnn_feature_columns. tree_center_bias: Whether a separate tree should be created for first fitting the bias. dnn_to_tree_distillation_param: A Tuple of (float, loss_fn), where the float defines the weight of the distillation loss, and the loss_fn, for computing distillation loss, takes dnn_logits, tree_logits and weight tensor. If the entire tuple is None, no distillation will be applied. If only the loss_fn is None, we will take the sigmoid/softmax cross entropy loss be default. When distillation is applied, `predict_with_tree_only` will be set to True. use_core_versions: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. output_type: Whether to return ModelFnOps (old interface) or EstimatorSpec (new interface). override_global_step_value: If after the training is done, global step value must be reset to this value. This is particularly useful for hyper parameter tuning, which can't recognize early stopping due to the number of trees. If None, no override of global step will happen. Returns: A `ModelFnOps` object. Raises: ValueError: if inputs are not valid. """ if not isinstance(features, dict): raise ValueError("features should be a dictionary of `Tensor`s. " "Given type: {}".format(type(features))) if not dnn_feature_columns: raise ValueError("dnn_feature_columns must be specified") if dnn_to_tree_distillation_param: if not predict_with_tree_only: logging.warning( "update predict_with_tree_only to True since distillation" "is specified.") predict_with_tree_only = True # Build DNN Logits. dnn_parent_scope = "dnn" dnn_partitioner = dnn_input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=config.num_ps_replicas, min_slice_size=64 << 20)) if (output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC and not use_core_versions): raise ValueError("You must use core versions with Estimator Spec") with variable_scope.variable_scope(dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=dnn_partitioner) as input_layer_scope: if use_core_versions: input_layer = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope]) else: input_layer = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=input_layer_scope) previous_layer = input_layer for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(previous_layer, )) as hidden_layer_scope: net = layers.fully_connected( previous_layer, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) previous_layer = net with variable_scope.variable_scope( "logits", values=(previous_layer, )) as logits_scope: dnn_logits = layers.fully_connected( previous_layer, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=logits_scope) _add_hidden_layer_summary(dnn_logits, logits_scope.name) def _dnn_train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=training_util.get_global_step(), learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), name=dnn_parent_scope, variables=ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) # Build Tree Logits. global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", # Initialize an empty ensemble. name="ensemble_model") tree_features = features.copy() if dnn_input_layer_to_tree: tree_features["dnn_input_layer"] = input_layer tree_feature_columns.append( layers.real_valued_column("dnn_input_layer")) gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=tree_center_bias, examples_per_layer=tree_examples_per_layer, learner_config=tree_learner_config, feature_columns=tree_feature_columns, logits_dimension=head.logits_dimension, features=tree_features, use_core_columns=use_core_versions) with ops.name_scope("gbdt"): predictions_dict = gbdt_model.predict(mode) tree_logits = predictions_dict["predictions"] def _tree_train_op_fn(loss): """Returns the op to optimize the loss.""" if dnn_to_tree_distillation_param: loss_weight, loss_fn = dnn_to_tree_distillation_param weight_tensor = head_lib._weight_tensor( # pylint: disable=protected-access features, head.weight_column_name) dnn_logits_fixed = array_ops.stop_gradient(dnn_logits) if loss_fn is None: # we create the loss_fn similar to the head loss_fn for # multi_class_head used previously as the default one. n_classes = 2 if head.logits_dimension == 1 else head.logits_dimension loss_fn = distillation_loss.create_dnn_to_tree_cross_entropy_loss_fn( n_classes) dnn_to_tree_distillation_loss = loss_weight * loss_fn( dnn_logits_fixed, tree_logits, weight_tensor) summary.scalar("dnn_to_tree_distillation_loss", dnn_to_tree_distillation_loss) loss += dnn_to_tree_distillation_loss update_op = gbdt_model.train(loss, predictions_dict, labels) with ops.control_dependencies( [update_op]), (ops.colocate_with(global_step)): update_op = state_ops.assign_add(global_step, 1).op return update_op if predict_with_tree_only: if mode == model_fn.ModeKeys.TRAIN or mode == model_fn.ModeKeys.INFER: tree_train_logits = tree_logits else: tree_train_logits = control_flow_ops.cond( global_step > dnn_steps_to_train, lambda: tree_logits, lambda: dnn_logits) else: tree_train_logits = dnn_logits + tree_logits def _no_train_op_fn(loss): """Returns a no-op.""" del loss return control_flow_ops.no_op() if tree_center_bias: num_trees += 1 finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor() if output_type == model.ModelBuilderOutputType.MODEL_FN_OPS: if use_core_versions: model_fn_ops = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops( dnn_train_op).train_op tree_train_op = head.create_estimator_spec( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops( tree_train_op).train_op model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops( model_fn_ops) else: model_fn_ops = head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_train_op = head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits).train_op tree_train_op = head.create_model_fn_ops( features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits).train_op # Add the hooks model_fn_ops.training_hooks.extend([ trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train, tree_train_op), trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees, override_global_step_value) ]) return model_fn_ops elif output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC: fusion_spec = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) dnn_spec = head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_dnn_train_op_fn, logits=dnn_logits) tree_spec = head.create_estimator_spec(features=tree_features, mode=mode, labels=labels, train_op_fn=_tree_train_op_fn, logits=tree_train_logits) training_hooks = [ trainer_hooks.SwitchTrainOp(dnn_spec.train_op, dnn_steps_to_train, tree_spec.train_op), trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees, override_global_step_value) ] fusion_spec = fusion_spec._replace(training_hooks=training_hooks + list(fusion_spec.training_hooks)) return fusion_spec
def model_fn(features, labels, mode, params, config): head = params["head"] learner_config = params["learner_config"] examples_per_layer = params["examples_per_layer"] feature_columns = params["feature_columns"] weight_column_name = params["weight_column_name"] num_trees = params["num_trees"] use_core_libs = params["use_core_libs"] logits_modifier_function = params["logits_modifier_function"] output_leaf_index = params["output_leaf_index"] if features is None: raise ValueError("At least one feature must be specified.") if config is None: raise ValueError("Missing estimator RunConfig.") center_bias = params["center_bias"] if isinstance(features, ops.Tensor): features = {features.name: features} # Make a shallow copy of features to ensure downstream usage # is unaffected by modifications in the model function. training_features = copy.copy(features) training_features.pop(weight_column_name, None) global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", # Initialize an empty ensemble. name="ensemble_model") # Create GBDT model. gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=center_bias, examples_per_layer=examples_per_layer, learner_config=learner_config, feature_columns=feature_columns, logits_dimension=head.logits_dimension, features=training_features, use_core_columns=use_core_libs, output_leaf_index=output_leaf_index) with ops.name_scope("gbdt", "gbdt_optimizer"): predictions_dict = gbdt_model.predict(mode) logits = predictions_dict["predictions"] if logits_modifier_function: logits = logits_modifier_function(logits, features, mode) def _train_op_fn(loss): """Returns the op to optimize the loss.""" update_op = gbdt_model.train(loss, predictions_dict, labels) with ops.control_dependencies( [update_op]), (ops.colocate_with(global_step)): update_op = state_ops.assign_add(global_step, 1).op return update_op create_estimator_spec_op = getattr(head, "create_estimator_spec", None) training_hooks = [] if num_trees: if center_bias: num_trees += 1 finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor() training_hooks.append( trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees)) if use_core_libs and callable(create_estimator_spec_op): model_fn_ops = head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops( model_fn_ops) else: model_fn_ops = head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) if output_leaf_index and gbdt_batch.LEAF_INDEX in predictions_dict: model_fn_ops.predictions[gbdt_batch.LEAF_INDEX] = predictions_dict[ gbdt_batch.LEAF_INDEX] model_fn_ops.training_hooks.extend(training_hooks) return model_fn_ops
def model_builder(features, labels, mode, params, config): """Multi-machine batch gradient descent tree model. Args: features: `Tensor` or `dict` of `Tensor` objects. labels: Labels used to train on. mode: Mode we are in. (TRAIN/EVAL/INFER) params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * learner_config: A config for the learner. * feature_columns: An iterable containing all the feature columns used by the model. * examples_per_layer: Number of examples to accumulate before growing a layer. It can also be a function that computes the number of examples based on the depth of the layer that's being built. * weight_column_name: The name of weight column. * center_bias: Whether a separate tree should be created for first fitting the bias. config: `RunConfig` of the estimator. Returns: A `ModelFnOps` object. Raises: ValueError: if inputs are not valid. """ head = params["head"] learner_config = params["learner_config"] examples_per_layer = params["examples_per_layer"] feature_columns = params["feature_columns"] weight_column_name = params["weight_column_name"] num_trees = params["num_trees"] if features is None: raise ValueError("At least one feature must be specified.") if config is None: raise ValueError("Missing estimator RunConfig.") center_bias = params["center_bias"] # Make a shallow copy of features to ensure downstream usage # is unaffected by modifications in the model function. training_features = copy.copy(features) training_features.pop(weight_column_name, None) global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", # Initialize an empty ensemble. name="ensemble_model") # Create GBDT model. gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=config.is_chief, num_ps_replicas=config.num_ps_replicas, ensemble_handle=ensemble_handle, center_bias=center_bias, examples_per_layer=examples_per_layer, learner_config=learner_config, feature_columns=feature_columns, features=features) with ops.name_scope("gbdt", "gbdt_optimizer"): predictions_dict = gbdt_model.predict(mode) logits = predictions_dict["predictions"] def _train_op_fn(loss): """Returns the op to optimize the loss.""" update_op = gbdt_model.train(loss, predictions_dict, labels) with ops.control_dependencies( [update_op]), (ops.colocate_with(global_step)): update_op = state_ops.assign_add(global_step, 1).op return update_op model_fn_ops = head.create_model_fn_ops(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits) if num_trees: if center_bias: num_trees += 1 finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor( ) model_fn_ops.training_hooks.append( trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees)) return model_fn_ops