def _get_transformed_features(features, feature_columns): """Gets the transformed features from features/feature_columns pair. Args: features: a dicionary of name to Tensor. feature_columns: a list/set of tf.feature_column. Returns: result_features: a list of the transformed features, sorted by the name. Raises: ValueError: when unsupported features/columns are tried. """ # pylint:disable=protected-access for fc in feature_columns: if not isinstance(fc, feature_column_lib._BucketizedColumn): raise ValueError( 'For now, only bucketized_column is supported but ' 'got: {}'.format(fc)) transformed_features = feature_column_lib._transform_features( features, feature_columns) # pylint:enable=protected-access result_features = [] for column in sorted(transformed_features, key=lambda tc: tc.name): source_name = column.source_column.name squeezed_tensor = array_ops.squeeze(transformed_features[column], axis=1) if len(squeezed_tensor.shape) > 1: raise ValueError( 'For now, only supports features equivalent to rank 1 ' 'but column `{}` got: {}'.format(source_name, features[source_name].shape)) result_features.append(squeezed_tensor) return result_features
def _get_transformed_features(features, feature_columns): """Gets the transformed features from features/feature_columns pair. Args: features: a dicionary of name to Tensor. feature_columns: a list/set of tf.feature_column. Returns: result_features: a list of the transformed features, sorted by the name. Raises: ValueError: when unsupported features/columns are tried. """ # pylint:disable=protected-access for fc in feature_columns: if not isinstance(fc, feature_column_lib._BucketizedColumn): raise ValueError('For now, only bucketized_column is supported but ' 'got: {}'.format(fc)) transformed_features = feature_column_lib._transform_features( features, feature_columns) # pylint:enable=protected-access result_features = [] for column in sorted(transformed_features, key=lambda tc: tc.name): source_name = column.source_column.name squeezed_tensor = array_ops.squeeze(transformed_features[column], axis=1) if len(squeezed_tensor.shape) > 1: raise ValueError('For now, only supports features equivalent to rank 1 ' 'but column `{}` got: {}'.format( source_name, features[source_name].shape)) result_features.append(squeezed_tensor) return result_features
def _get_transformed_features(features, sorted_feature_columns): """Gets the transformed features from features/feature_columns pair. Args: features: a dicionary of name to Tensor. sorted_feature_columns: a list/set of tf.feature_column, sorted by name. Returns: result_features: a list of the transformed features, sorted by the name. Raises: ValueError: when unsupported features/columns are tried. """ # pylint:disable=protected-access transformed_features = feature_column_lib._transform_features( features, sorted_feature_columns) result_features = [] for column in sorted_feature_columns: if isinstance(column, feature_column_lib._BucketizedColumn): source_name = column.source_column.name squeezed_tensor = array_ops.squeeze(transformed_features[column], axis=1) if len(squeezed_tensor.shape) > 1: raise ValueError( 'For now, only supports features equivalent to rank 1 ' 'but column `{}` got: {}'.format( source_name, features[source_name].shape)) result_features.append(squeezed_tensor) elif isinstance(column, feature_column_lib._IndicatorColumn): source_name = column.categorical_column.name tensor = math_ops.to_int32(transformed_features[column]) if len(tensor.shape) > 2: raise ValueError( 'Rank of indicator column must be no more than 2, ' 'but column `{}` got: {}'.format( source_name, features[source_name].shape)) unstacked = array_ops.unstack(tensor, axis=1) result_features.extend(unstacked) else: raise ValueError( 'For now, only bucketized_column and indicator_column is supported ' 'but got: {}'.format(column)) # pylint:enable=protected-access return result_features
def _get_transformed_features(features, sorted_feature_columns): """Gets the transformed features from features/feature_columns pair. Args: features: a dicionary of name to Tensor. sorted_feature_columns: a list/set of tf.feature_column, sorted by name. Returns: result_features: a list of the transformed features, sorted by the name. Raises: ValueError: when unsupported features/columns are tried. """ # pylint:disable=protected-access transformed_features = feature_column_lib._transform_features( features, sorted_feature_columns) result_features = [] for column in sorted_feature_columns: if isinstance(column, feature_column_lib._BucketizedColumn): source_name = column.source_column.name squeezed_tensor = array_ops.squeeze(transformed_features[column], axis=1) if len(squeezed_tensor.shape) > 1: raise ValueError('For now, only supports features equivalent to rank 1 ' 'but column `{}` got: {}'.format( source_name, features[source_name].shape)) result_features.append(squeezed_tensor) elif isinstance(column, feature_column_lib._IndicatorColumn): source_name = column.categorical_column.name tensor = math_ops.to_int32(transformed_features[column]) if len(tensor.shape) > 2: raise ValueError('Rank of indicator column must be no more than 2, ' 'but column `{}` got: {}'.format( source_name, features[source_name].shape)) unstacked = array_ops.unstack(tensor, axis=1) result_features.extend(unstacked) else: raise ValueError( 'For now, only bucketized_column and indicator_column is supported ' 'but got: {}'.format(column)) # pylint:enable=protected-access return result_features
def _model_fn(features, labels, mode): """Function that returns predictions, training loss, and training op.""" if (isinstance(features, ops.Tensor) or isinstance(features, sparse_tensor.SparseTensor)): features = {'features': features} if feature_columns: features = features.copy() if output_type == ModelBuilderOutputType.MODEL_FN_OPS: features.update( layers.transform_features(features, feature_columns)) else: for fc in feature_columns: tensor = fc_core._transform_features(features, [fc])[fc] # pylint: disable=protected-access features[fc.name] = tensor weights = None if weights_name and weights_name in features: weights = features.pop(weights_name) keys = None if keys_name and keys_name in features: keys = features.pop(keys_name) # If we're doing eval, optionally ignore device_assigner. # Also ignore device assigner if we're exporting (mode == INFER) dev_assn = device_assigner if (mode == model_fn_lib.ModeKeys.INFER or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)): dev_assn = None graph_builder = graph_builder_class(params, device_assigner=dev_assn) logits, tree_paths, regression_variance = graph_builder.inference_graph( features) summary.scalar('average_tree_size', graph_builder.average_size()) # For binary classification problems, convert probabilities to logits. # Includes hack to get around the fact that a probability might be 0 or 1. if not params.regression and params.num_classes == 2: class_1_probs = array_ops.slice(logits, [0, 1], [-1, 1]) logits = math_ops.log( math_ops.maximum( class_1_probs / math_ops.maximum(1.0 - class_1_probs, EPSILON), EPSILON)) # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). training_graph = None training_hooks = [] if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: with ops.control_dependencies([logits.op]): training_graph = control_flow_ops.group( graph_builder.training_graph(features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(training_util.get_global_step(), 1)) # Put weights back in if weights is not None: features[weights_name] = weights # TensorForest's training graph isn't calculated directly from the loss # like many other models. def _train_fn(unused_loss): return training_graph # Ops are run in lexigraphical order of their keys. Run the resource # clean-up op last. all_handles = graph_builder.get_all_resource_handles() ops_at_end = { '9: clean up resources': control_flow_ops.group(*[ resource_variable_ops.destroy_resource_op(handle) for handle in all_handles ]) } if report_feature_importances: ops_at_end['1: feature_importances'] = ( graph_builder.feature_importances()) training_hooks = [TensorForestRunOpAtEndHook(ops_at_end)] if output_type == ModelBuilderOutputType.MODEL_FN_OPS: model_ops = model_head.create_model_fn_ops(features=features, labels=labels, mode=mode, train_op_fn=_train_fn, logits=logits, scope=head_scope) if early_stopping_rounds: training_hooks.append( TensorForestLossHook(early_stopping_rounds, early_stopping_loss_threshold= early_stopping_loss_threshold, loss_op=model_ops.loss)) model_ops.training_hooks.extend(training_hooks) if keys is not None: model_ops.predictions[keys_name] = keys if params.inference_tree_paths: model_ops.predictions[TREE_PATHS_PREDICTION_KEY] = tree_paths model_ops.predictions[ VARIANCE_PREDICTION_KEY] = regression_variance if include_all_in_serving: # In order to serve the variance we need to add the prediction dict # to output_alternatives dict. if not model_ops.output_alternatives: model_ops.output_alternatives = {} model_ops.output_alternatives[ALL_SERVING_KEY] = ( constants.ProblemType.UNSPECIFIED, model_ops.predictions) return model_ops else: # Estimator spec estimator_spec = model_head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_fn, logits=logits) if early_stopping_rounds: training_hooks.append( TensorForestLossHook(early_stopping_rounds, early_stopping_loss_threshold= early_stopping_loss_threshold, loss_op=estimator_spec.loss)) estimator_spec = estimator_spec._replace( training_hooks=training_hooks + list(estimator_spec.training_hooks)) if keys is not None: estimator_spec.predictions[keys_name] = keys if params.inference_tree_paths: estimator_spec.predictions[ TREE_PATHS_PREDICTION_KEY] = tree_paths estimator_spec.predictions[ VARIANCE_PREDICTION_KEY] = regression_variance if include_all_in_serving: outputs = estimator_spec.export_outputs if not outputs: outputs = {} outputs = { ALL_SERVING_KEY: PredictOutput(estimator_spec.predictions) } print(estimator_spec.export_outputs) # In order to serve the variance we need to add the prediction dict # to output_alternatives dict. estimator_spec = estimator_spec._replace( export_outputs=outputs) return estimator_spec
def extract_features(features, feature_columns, use_core_columns): """Extracts columns from a dictionary of features. Args: features: `dict` of `Tensor` objects. feature_columns: A list of feature_columns. Returns: Seven values: - A list of all feature column names. - A list of dense floats. - A list of sparse float feature indices. - A list of sparse float feature values. - A list of sparse float feature shapes. - A list of sparse int feature indices. - A list of sparse int feature values. - A list of sparse int feature shapes. Raises: ValueError: if features is not valid. """ if not features: raise ValueError("Features dictionary must be specified.") # Make a shallow copy of features to ensure downstream usage # is unaffected by modifications in the model function. features = copy.copy(features) if feature_columns: scope = "gbdt" with variable_scope.variable_scope(scope): feature_columns = list(feature_columns) transformed_features = collections.OrderedDict() for fc in feature_columns: # pylint: disable=protected-access if use_core_columns: # pylint: disable=protected-access tensor = fc_core._transform_features(features, [fc])[fc] transformed_features[fc.name] = tensor elif isinstance(fc, feature_column_lib._EmbeddingColumn): # pylint: enable=protected-access transformed_features[fc.name] = fc_core.input_layer( features, [fc], weight_collections=[scope]) else: result = feature_column_ops.transform_features(features, [fc]) if len(result) > 1: raise ValueError("Unexpected number of output features") transformed_features[fc.name] = result[list(result.keys())[0]] features = transformed_features dense_float_names = [] dense_floats = [] sparse_float_names = [] sparse_float_indices = [] sparse_float_values = [] sparse_float_shapes = [] sparse_int_names = [] sparse_int_indices = [] sparse_int_values = [] sparse_int_shapes = [] for key in sorted(features.keys()): tensor = features[key] if isinstance(tensor, sparse_tensor.SparseTensor): if tensor.values.dtype == dtypes.float32: sparse_float_names.append(key) sparse_float_indices.append(tensor.indices) sparse_float_values.append(tensor.values) sparse_float_shapes.append(tensor.dense_shape) elif tensor.values.dtype == dtypes.int64: sparse_int_names.append(key) sparse_int_indices.append(tensor.indices) sparse_int_values.append(tensor.values) sparse_int_shapes.append(tensor.dense_shape) else: raise ValueError("Unsupported sparse feature %s with dtype %s." % (tensor.indices.name, tensor.dtype)) else: if tensor.dtype == dtypes.float32: if len(tensor.shape) > 1 and tensor.shape[1] > 1: unstacked = array_ops.unstack(tensor, axis=1) for i in range(len(unstacked)): dense_float_names.append(_FEATURE_NAME_TEMPLATE % (key, i)) dense_floats.append(array_ops.reshape(unstacked[i], [-1, 1])) else: dense_float_names.append(key) dense_floats.append(tensor) else: raise ValueError("Unsupported dense feature %s with dtype %s." % (tensor.name, tensor.dtype)) # Feature columns are logically organized into incrementing slots starting # from dense floats, then sparse floats then sparse ints. fc_names = (dense_float_names + sparse_float_names + sparse_int_names) return (fc_names, dense_floats, sparse_float_indices, sparse_float_values, sparse_float_shapes, sparse_int_indices, sparse_int_values, sparse_int_shapes)
def _model_fn(features, labels, mode): """Function that returns predictions, training loss, and training op.""" if (isinstance(features, ops.Tensor) or isinstance(features, sparse_tensor.SparseTensor)): features = {'features': features} if feature_columns: features = features.copy() if output_type == ModelBuilderOutputType.MODEL_FN_OPS: features.update(layers.transform_features(features, feature_columns)) else: for fc in feature_columns: tensor = fc_core._transform_features(features, [fc])[fc] # pylint: disable=protected-access features[fc.name] = tensor weights = None if weights_name and weights_name in features: weights = features.pop(weights_name) keys = None if keys_name and keys_name in features: keys = features.pop(keys_name) # If we're doing eval, optionally ignore device_assigner. # Also ignore device assigner if we're exporting (mode == INFER) dev_assn = device_assigner if (mode == model_fn_lib.ModeKeys.INFER or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)): dev_assn = None graph_builder = graph_builder_class(params, device_assigner=dev_assn) logits, tree_paths, regression_variance = graph_builder.inference_graph( features) summary.scalar('average_tree_size', graph_builder.average_size()) # For binary classification problems, convert probabilities to logits. # Includes hack to get around the fact that a probability might be 0 or 1. if not params.regression and params.num_classes == 2: class_1_probs = array_ops.slice(logits, [0, 1], [-1, 1]) logits = math_ops.log( math_ops.maximum(class_1_probs / math_ops.maximum( 1.0 - class_1_probs, EPSILON), EPSILON)) # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). training_graph = None training_hooks = [] if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: with ops.control_dependencies([logits.op]): training_graph = control_flow_ops.group( graph_builder.training_graph( features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(training_util.get_global_step(), 1)) # Put weights back in if weights is not None: features[weights_name] = weights # TensorForest's training graph isn't calculated directly from the loss # like many other models. def _train_fn(unused_loss): return training_graph # Ops are run in lexigraphical order of their keys. Run the resource # clean-up op last. all_handles = graph_builder.get_all_resource_handles() ops_at_end = { '9: clean up resources': control_flow_ops.group(*[ resource_variable_ops.destroy_resource_op(handle) for handle in all_handles ]) } if report_feature_importances: ops_at_end['1: feature_importances'] = ( graph_builder.feature_importances()) training_hooks = [TensorForestRunOpAtEndHook(ops_at_end)] if output_type == ModelBuilderOutputType.MODEL_FN_OPS: model_ops = model_head.create_model_fn_ops( features=features, labels=labels, mode=mode, train_op_fn=_train_fn, logits=logits, scope=head_scope) if early_stopping_rounds: training_hooks.append( TensorForestLossHook( early_stopping_rounds, early_stopping_loss_threshold=early_stopping_loss_threshold, loss_op=model_ops.loss)) model_ops.training_hooks.extend(training_hooks) if keys is not None: model_ops.predictions[keys_name] = keys if params.inference_tree_paths: model_ops.predictions[TREE_PATHS_PREDICTION_KEY] = tree_paths model_ops.predictions[VARIANCE_PREDICTION_KEY] = regression_variance if include_all_in_serving: # In order to serve the variance we need to add the prediction dict # to output_alternatives dict. if not model_ops.output_alternatives: model_ops.output_alternatives = {} model_ops.output_alternatives[ALL_SERVING_KEY] = ( constants.ProblemType.UNSPECIFIED, model_ops.predictions) return model_ops else: # Estimator spec estimator_spec = model_head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_fn, logits=logits) if early_stopping_rounds: training_hooks.append( TensorForestLossHook( early_stopping_rounds, early_stopping_loss_threshold=early_stopping_loss_threshold, loss_op=estimator_spec.loss)) estimator_spec = estimator_spec._replace( training_hooks=training_hooks + list(estimator_spec.training_hooks)) if keys is not None: estimator_spec.predictions[keys_name] = keys if params.inference_tree_paths: estimator_spec.predictions[TREE_PATHS_PREDICTION_KEY] = tree_paths estimator_spec.predictions[VARIANCE_PREDICTION_KEY] = regression_variance if include_all_in_serving: outputs = estimator_spec.export_outputs if not outputs: outputs = {} outputs = {ALL_SERVING_KEY: PredictOutput(estimator_spec.predictions)} print(estimator_spec.export_outputs) # In order to serve the variance we need to add the prediction dict # to output_alternatives dict. estimator_spec = estimator_spec._replace(export_outputs=outputs) return estimator_spec