def _model_fn(features, labels, mode): """Model function.""" assert labels is None, labels (all_scores, model_predictions, losses, training_op) = clustering_ops.KMeans( self._parse_tensor_or_dict(features), self._num_clusters, self._training_initial_clusters, self._distance_metric, self._use_mini_batch, random_seed=self._random_seed, kmeans_plus_plus_num_retries=self. _kmeans_plus_plus_num_retries).training_graph() incr_step = state_ops.assign_add(variables.get_global_step(), 1) loss = math_ops.reduce_sum(losses, name=KMeansClustering.LOSS_OP_NAME) logging_ops.scalar_summary('loss/raw', loss) training_op = with_dependencies([training_op, incr_step], loss) predictions = { KMeansClustering.ALL_SCORES: all_scores[0], KMeansClustering.CLUSTER_IDX: model_predictions[0], } eval_metric_ops = {KMeansClustering.SCORES: loss,} return ModelFnOps( mode=mode, predictions=predictions, eval_metric_ops=eval_metric_ops, loss=loss, train_op=training_op)
def inference(inp, num_clusters, hidden1_units, hidden2_units): """Build the MNIST model up to where it may be used for inference. Args: inp: input data num_clusters: number of clusters of input features to train. hidden1_units: Size of the first hidden layer. hidden2_units: Size of the second hidden layer. Returns: logits: Output tensor with the computed logits. clustering_loss: Clustering loss. kmeans_training_op: An op to train the clustering. """ # Clustering kmeans = clustering_ops.KMeans( inp, num_clusters, distance_metric=clustering_ops.COSINE_DISTANCE, # TODO(agarwal): kmeans++ is currently causing crash in dbg mode. # Enable this after fixing. # initial_clusters=clustering_ops.KMEANS_PLUS_PLUS_INIT, use_mini_batch=True) all_scores, _, clustering_scores, kmeans_training_op = kmeans.training_graph( ) # Some heuristics to approximately whiten this output. all_scores = (all_scores[0] - 0.5) * 5 # Here we avoid passing the gradients from the supervised objective back to # the clusters by creating a stop_gradient node. all_scores = tf.stop_gradient(all_scores) clustering_loss = tf.reduce_sum(clustering_scores[0]) # Hidden 1 with tf.name_scope('hidden1'): weights = tf.Variable(tf.truncated_normal( [num_clusters, hidden1_units], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))), name='weights') biases = tf.Variable(tf.zeros([hidden1_units]), name='biases') hidden1 = tf.nn.relu(tf.matmul(all_scores, weights) + biases) # Hidden 2 with tf.name_scope('hidden2'): weights = tf.Variable(tf.truncated_normal( [hidden1_units, hidden2_units], stddev=1.0 / math.sqrt(float(hidden1_units))), name='weights') biases = tf.Variable(tf.zeros([hidden2_units]), name='biases') hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases) # Linear with tf.name_scope('softmax_linear'): weights = tf.Variable(tf.truncated_normal( [hidden2_units, NUM_CLASSES], stddev=1.0 / math.sqrt(float(hidden2_units))), name='weights') biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases') logits = tf.matmul(hidden2, weights) + biases return logits, clustering_loss, kmeans_training_op
def kmeans_cluster_model_fn(features, labels, mode, params, config): """Model function for KMeansClustering estimator.""" # https://wiki.python.org/moin/UsingAssertionsEffectively assert labels is None, "labels are not needed: " + labels # clustering_ops.KMeans 是重要的算法实现过程 # https://github.com/tensorflow/tensorflow/blob/master/tensorflow # /contrib/factorization/python/ops/clustering_ops.py (all_scores, model_predictions, losses, is_initialized, cluster_centers_var, init_op, training_op) = \ clustering_ops.KMeans( _parse_tensor_or_dict(features), params.get('num_clusters'), initial_clusters=clustering_ops.RANDOM_INIT, distance_metric=clustering_ops.SQUARED_EUCLIDEAN_DISTANCE, use_mini_batch=False, mini_batch_steps_per_iteration=1, # use_mini_batch = params.get('use_mini_batch'), # mini_batch_steps_per_iteration=params.get( # 'mini_batch_steps_per_iteration'), random_seed=params.get('random_seed'), kmeans_plus_plus_num_retries=params.get( 'kmeans_plus_plus_num_retries')).training_graph() incr_step = state_ops.assign_add(variables.get_global_step(), 1) loss = math_ops.reduce_sum(losses, name='kmeans_loss') # Outputs a Summary protocol buffer containing a single scalar value. # Used for visualizing in TensorBoard summary.scalar('loss/raw', loss) # https://github.com/tensorflow/tensorflow/blob/master/tensorflow # /python/ops/control_flow_ops.py # with_dependencies(dependencies, output_tensor, name=None): # Produces the content of `output_tensor` only after `dependencies`. training_op = with_dependencies([training_op, incr_step], loss) predictions = { 'all_scores': all_scores[0], 'cluster_idx': model_predictions[0], } eval_metric_ops = {'scores': loss} # Hook for monitor training_hooks = [ _InitializeClustersHook(init_op, is_initialized, config.is_chief) ] relative_tolerance = params.get('relative_tolerance') if relative_tolerance is not None: training_hooks.append(_LossRelativeChangeHook(relative_tolerance)) return ModelFnOps(mode=mode, predictions=predictions, eval_metric_ops=eval_metric_ops, loss=loss, train_op=training_op, training_hooks=training_hooks)
def _get_eval_ops(self, features, _, unused_metrics): (_, _, losses, _) = clustering_ops.KMeans( features, self._num_clusters, self._training_initial_clusters, self._distance_metric, self._use_mini_batch, random_seed=self._random_seed, kmeans_plus_plus_num_retries=self.kmeans_plus_plus_num_retries ).training_graph() return { KMeansClustering.SCORES: tf.reduce_sum(losses), }
def _get_predict_ops(self, features): (all_scores, model_predictions, _, _) = clustering_ops.KMeans( features, self._num_clusters, self._training_initial_clusters, self._distance_metric, self._use_mini_batch, random_seed=self._random_seed, kmeans_plus_plus_num_retries=self.kmeans_plus_plus_num_retries ).training_graph() return { KMeansClustering.ALL_SCORES: all_scores[0], KMeansClustering.CLUSTER_IDX: model_predictions[0] }
def _get_train_ops(self, features, _): (_, _, losses, training_op) = clustering_ops.KMeans( features, self._num_clusters, self._training_initial_clusters, self._distance_metric, self._use_mini_batch, random_seed=self._random_seed, kmeans_plus_plus_num_retries=self.kmeans_plus_plus_num_retries ).training_graph() incr_step = tf.assign_add(tf.contrib.framework.get_global_step(), 1) self._loss = tf.reduce_sum(losses) training_op = with_dependencies([training_op, incr_step], self._loss) return training_op, self._loss
def _build_estimator(self, X=None): if not self._estimator_built: if self.num_features is None: self.num_features = get_num_features(X) # Reload params from checkpoint if available if self._to_be_restored and self.num_features is None: self.num_features = read_tensor_in_checkpoint( 'num_features', self._to_be_restored) if self._to_be_restored and self.num_classes is None: self.num_classes = read_tensor_in_checkpoint( 'num_classes', self._to_be_restored) # Purity checks if self.num_features is None: raise ValueError("'num_features' cannot be None.") # Persistent Parameters tf.Variable(self.num_features, dtype=tf.int32, name='num_features') self._kmeans = c_ops.KMeans(X, self.n_clusters, initial_clusters=self.init, distance_metric=self.distance, use_mini_batch=self.use_mini_batch) (self._all_scores, self._cluster_idx, self._scores, self._cluster_centers_initialized, self._cluster_centers_vars, self._init_op, self._train_op) = self._kmeans.training_graph() # fix for cluster_idx being a tuple self._cluster_idx = self._cluster_idx[0] self.avg_distance = tf.reduce_mean(self._scores) self._estimator_built = True self._init_graph()
def _kmeans_clustering_model_fn(features, labels, mode, params, config): """Model function for KMeansClustering estimator.""" assert labels is None, labels (all_scores, model_predictions, losses, is_initialized, init_op, training_op) = clustering_ops.KMeans( _parse_tensor_or_dict(features), params.get('num_clusters'), initial_clusters=params.get('training_initial_clusters'), distance_metric=params.get('distance_metric'), use_mini_batch=params.get('use_mini_batch'), mini_batch_steps_per_iteration=params.get( 'mini_batch_steps_per_iteration'), random_seed=params.get('random_seed'), kmeans_plus_plus_num_retries=params.get( 'kmeans_plus_plus_num_retries')).training_graph() incr_step = state_ops.assign_add(variables.get_global_step(), 1) loss = math_ops.reduce_sum(losses, name=KMeansClustering.LOSS_OP_NAME) summary.scalar('loss/raw', loss) training_op = with_dependencies([training_op, incr_step], loss) predictions = { KMeansClustering.ALL_SCORES: all_scores[0], KMeansClustering.CLUSTER_IDX: model_predictions[0], } eval_metric_ops = {KMeansClustering.SCORES: loss} training_hooks = [ _InitializeClustersHook(init_op, is_initialized, config.is_chief) ] relative_tolerance = params.get('relative_tolerance') if relative_tolerance is not None: training_hooks.append(_LossRelativeChangeHook(relative_tolerance)) return ModelFnOps(mode=mode, predictions=predictions, eval_metric_ops=eval_metric_ops, loss=loss, train_op=training_op, training_hooks=training_hooks)
def model_fn(self, features, mode, config): """Model function for the estimator. Note that this does not take a `labels` arg. This works, but `input_fn` must return either `features` or, equivalently, `(features, None)`. Args: features: The input points. See `tf.estimator.Estimator`. mode: See `tf.estimator.Estimator`. config: See `tf.estimator.Estimator`. Returns: A `tf.estimator.EstimatorSpec` (see `tf.estimator.Estimator`) specifying this behavior: * `train_op`: Execute one mini-batch or full-batch run of Lloyd's algorithm. * `loss`: The sum of the squared distances from each input point to its closest center. * `eval_metric_ops`: Maps `SCORE` to `loss`. * `predictions`: Maps `ALL_DISTANCES` to the distance from each input point to each cluster center; maps `CLUSTER_INDEX` to the index of the closest cluster center for each input point. """ # input_points is a single Tensor. Therefore, the sharding functionality # in clustering_ops is unused, and some of the values below are lists of a # single item. input_points = _parse_features_if_necessary(features, self._feature_columns) # Let N = the number of input_points. # all_distances: A list of one matrix of shape (N, num_clusters). Each value # is the distance from an input point to a cluster center. # model_predictions: A list of one vector of shape (N). Each value is the # cluster id of an input point. # losses: Similar to cluster_idx but provides the distance to the cluster # center. # is_initialized: scalar indicating whether the initial cluster centers # have been chosen; see init_op. # init_op: an op to choose the initial cluster centers. A single worker # repeatedly executes init_op until is_initialized becomes True. # training_op: an op that runs an iteration of training, either an entire # Lloyd iteration or a mini-batch of a Lloyd iteration. Multiple workers # may execute this op, but only after is_initialized becomes True. (all_distances, model_predictions, losses, is_initialized, init_op, training_op) = clustering_ops.KMeans( inputs=input_points, num_clusters=self._num_clusters, initial_clusters=self._initial_clusters, distance_metric=self._distance_metric, use_mini_batch=self._use_mini_batch, mini_batch_steps_per_iteration=self. _mini_batch_steps_per_iteration, random_seed=self._random_seed, kmeans_plus_plus_num_retries=self._kmeans_plus_plus_num_retries ).training_graph() loss = math_ops.reduce_sum(losses) summary.scalar('loss/raw', loss) incr_step = state_ops.assign_add(training_util.get_global_step(), 1) training_op = control_flow_ops.with_dependencies( [training_op, incr_step], loss) training_hooks = [ _InitializeClustersHook(init_op, is_initialized, config.is_chief) ] if self._relative_tolerance is not None: training_hooks.append( _LossRelativeChangeHook(loss, self._relative_tolerance)) export_outputs = { KMeansClustering.ALL_DISTANCES: export_output.PredictOutput(all_distances[0]), KMeansClustering.CLUSTER_INDEX: export_output.PredictOutput(model_predictions[0]), signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: export_output.PredictOutput(model_predictions[0]) } return model_fn_lib.EstimatorSpec( mode=mode, predictions={ KMeansClustering.ALL_DISTANCES: all_distances[0], KMeansClustering.CLUSTER_INDEX: model_predictions[0], }, loss=loss, train_op=training_op, eval_metric_ops={KMeansClustering.SCORE: metrics.mean(loss)}, training_hooks=training_hooks, export_outputs=export_outputs)