def _sample_kmc2_chain():
         """Returns previous centers as well as a new center sampled using k-MC2.
 """
         # Extract the subset from the underlying batch.
         start = i * self._kmc2_chain_length
         end = start + self._kmc2_chain_length
         subset = first_shard[start:end]
         # Compute the distances from points in the subset to previous centers.
         _, distances = gen_clustering_ops.nearest_neighbors(
             subset, self._cluster_centers, 1)
         # Sample index of new center using k-MC2 Markov chain.
         new_center_index = gen_clustering_ops.kmc2_chain_initialization(
             array_ops.squeeze(distances), self._random_seed)
         # Extract actual new center.
         newly_sampled_center = array_ops.reshape(
             subset[new_center_index], [1, -1])
         # Return concatenation with previously sampled centers.
         if self._distance_metric == COSINE_DISTANCE:
             newly_sampled_center = nn_impl.l2_normalize(
                 newly_sampled_center, dim=1)
         return array_ops.concat(
             [self._cluster_centers, newly_sampled_center], 0)
示例#2
0
 def _sample_kmc2_chain():
   """Returns previous centers as well as a new center sampled using k-MC2.
   """
   # Extract the subset from the underlying batch.
   start = i * self._kmc2_chain_length
   end = start + self._kmc2_chain_length
   subset = first_shard[start:end]
   # Compute the distances from points in the subset to previous centers.
   _, distances = gen_clustering_ops.nearest_neighbors(
       subset, self._cluster_centers, 1)
   # Sample index of new center using k-MC2 Markov chain.
   new_center_index = gen_clustering_ops.kmc2_chain_initialization(
       array_ops.squeeze(distances), self._random_seed)
   # Extract actual new center.
   newly_sampled_center = array_ops.reshape(subset[new_center_index],
                                            [1, -1])
   # Return concatenation with previously sampled centers.
   if self._distance_metric == COSINE_DISTANCE:
     newly_sampled_center = nn_impl.l2_normalize(
         newly_sampled_center, dim=1)
   return array_ops.concat([self._cluster_centers, newly_sampled_center],
                           0)
示例#3
0
    def _infer_graph(self, inputs, clusters):
        """Maps input to closest cluster and the score.

    Args:
      inputs: list of input Tensors.
      clusters: Tensor of cluster centers.

    Returns:
      List of tuple, where each value in tuple corresponds to a value in inp.
      The tuple has following three elements:
      all_scores: distance of each input to each cluster center.
      score: distance of each input to closest cluster center.
      cluster_idx: index of cluster center closest to the corresponding input.
    """
        assert isinstance(inputs, list)
        # Pairwise distances are used only by transform(). In all other cases, this
        # sub-graph is not evaluated.
        scores = self._distance_graph(inputs, clusters, self._distance_metric)
        output = []
        if (self._distance_metric == COSINE_DISTANCE
                and not self._clusters_l2_normalized()):
            # The cosine distance between normalized vectors x and y is the same as
            # 2 * squared_euclidean_distance. We are using this fact and reusing the
            # nearest_neighbors op.
            # TODO (ands): Support COSINE distance in nearest_neighbors and remove id:695
            # https://github.com/imdone/tensorflow/issues/696
            # this.
            with ops.colocate_with(clusters, ignore_existing=True):
                clusters = nn_impl.l2_normalize(clusters, dim=1)
        for inp, score in zip(inputs, scores):
            with ops.colocate_with(inp, ignore_existing=True):
                (indices, distances) = gen_clustering_ops.nearest_neighbors(
                    inp, clusters, 1)
                if self._distance_metric == COSINE_DISTANCE:
                    distances *= 0.5
                output.append((score, array_ops.squeeze(distances, [-1]),
                               array_ops.squeeze(indices, [-1])))
        return zip(*output)
示例#4
0
  def _infer_graph(self, inputs, clusters):
    """Maps input to closest cluster and the score.

    Args:
      inputs: list of input Tensors.
      clusters: Tensor of cluster centers.

    Returns:
      List of tuple, where each value in tuple corresponds to a value in inp.
      The tuple has following three elements:
      all_scores: distance of each input to each cluster center.
      score: distance of each input to closest cluster center.
      cluster_idx: index of cluster center closest to the corresponding input.
    """
    assert isinstance(inputs, list)
    # Pairwise distances are used only by transform(). In all other cases, this
    # sub-graph is not evaluated.
    scores = self._distance_graph(inputs, clusters, self._distance_metric)
    output = []
    if (self._distance_metric == COSINE_DISTANCE and
        not self._clusters_l2_normalized()):
      # The cosine distance between normalized vectors x and y is the same as
      # 2 * squared_euclidian_distance. We are using this fact and reusing the
      # nearest_neighbors op.
      # TODO(ands): Support COSINE distance in nearest_neighbors and remove
      # this.
      with ops.colocate_with(clusters):
        clusters = nn_impl.l2_normalize(clusters, dim=1)
    for inp, score in zip(inputs, scores):
      with ops.colocate_with(inp):
        (indices,
         distances) = gen_clustering_ops.nearest_neighbors(inp, clusters, 1)
        if self._distance_metric == COSINE_DISTANCE:
          distances *= 0.5
        output.append(
            (score, array_ops.squeeze(distances), array_ops.squeeze(indices)))
    return zip(*output)