示例#1
0
def recall_at_k(labels, predictions, k):
    '''
    Compute recall at position k.

    :param labels: shape=(num_examples,), dtype=tf.int64
    :param predictions: logits of shape=(num_examples, num_classes)
    :param k: recall position
    :return: recall at position k


    Example:

    labels = tf.constant([0, 1, 1], dtype=tf.int64)
    predictions = tf.constant([[0.1, 0.2, 0.3], [3, 5, 2], [0.3, 0.4, 0.7]])
    recall_at_k(labels, predictions, 2)
    # recall_at_k(labels, predictions, 2) = 0.6667

    '''
    labels = expand_dims(labels, axis=1)
    _, predictions_idx = nn.top_k(predictions, k)
    predictions_idx = math_ops.to_int64(predictions_idx)
    tp = sets.set_size(sets.set_intersection(predictions_idx, labels))
    tp = math_ops.to_double(tp)
    tp = math_ops.reduce_sum(tp)
    fn = sets.set_size(
        sets.set_difference(predictions_idx, labels, aminusb=False))
    fn = math_ops.to_double(fn)
    fn = math_ops.reduce_sum(fn)
    recall = math_ops.div(tp, math_ops.add(tp, fn), name='recall_at_k')

    return recall
 def _compute_accuracy(logits, targets, weights=None):
   if self._n_classes > 2:
     _, predictions = nn.top_k(logits, 1)
   else:
     predictions = array_ops.reshape(logits, [-1])
     predictions = math_ops.greater(predictions,
                                    array_ops.zeros_like(predictions))
     targets = array_ops.reshape(targets, [-1])
   return metrics_lib.streaming_accuracy(
       math_ops.to_int32(predictions), math_ops.to_int32(targets), weights)
示例#3
0
def sparsemax(logits, axis=1, number_dim=2, name=None):
    """Computes sparsemax activations [1].
  For each batch `i` and class `j` we have
    $$sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)$$
  [1]: https://arxiv.org/abs/1602.02068
  Args:
    logits: A `Tensor`. Must be one of the following types: `half`, `float32`,
      `float64`.
    name: A name for the operation (optional).
  Returns:
    A `Tensor`. Has the same type as `logits`.
  """

    with ops.name_scope(name, "sparsemax", [logits]) as name:
        logits = ops.convert_to_tensor(logits, name="Matrix")
        print(logits)
        obs = array_ops.shape(logits)[0]
        obs2 = array_ops.shape(logits)[1]
        dims = array_ops.shape(logits)[2]
        print(obs, dims)
        z = logits - math_ops.reduce_mean(logits, axis=-1)[:,
                                                           array_ops.newaxis]

        # sort z
        z_sorted, _ = nn.top_k(z, k=dims)

        # calculate k(z)
        z_cumsum = math_ops.cumsum(z_sorted, axis=-1)
        k = math_ops.range(1,
                           math_ops.cast(dims, logits.dtype) + 1,
                           dtype=logits.dtype)
        z_check = 1 + k * z_sorted > z_cumsum
        # because the z_check vector is always [1,1,...1,0,0,...0] finding the
        # (index + 1) of the last `1` is the same as just summing the number of 1.
        k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32),
                                  axis=-1)

        # calculate tau(z)
        print(k_z)
        mesh = meshgrid(math_ops.range(0, obs))
        print(mesh)
        indices = array_ops.stack([mesh, k_z - 1], axis=-1)
        tau_sum = array_ops.gather_nd(z_cumsum, indices)
        tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype)

        # calculate p
        sparsemax = math_ops.maximum(math_ops.cast(0, logits.dtype),
                                     z - tau_z[:, array_ops.newaxis])
        #    sparsemax = transpose(sparsemax,perm=permut)
        return (sparsemax)
示例#4
0
def tf_spmax(logits, name=None):
    """Computes sparsemax activations [1].
  For each batch `i` and class `j` we have
    $$sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)$$
  [1]: https://arxiv.org/abs/1602.02068
  Args:
    logits: A `Tensor`. Must be one of the following types: `half`, `float32`,
      `float64`.
    name: A name for the operation (optional).
  Returns:
    A `Tensor`. Has the same type as `logits`.
  """

    with ops.name_scope(name, "sparsemax", [logits]) as name:
        logits = ops.convert_to_tensor(logits, name="logits")
        obs = array_ops.shape(logits)[0]
        dims = array_ops.shape(logits)[1]

        z = logits  #- math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis]

        # sort z
        z_sorted, _ = nn.top_k(z, k=dims)

        # calculate k(z)
        z_cumsum = math_ops.cumsum(z_sorted, axis=1)
        k = math_ops.range(1,
                           math_ops.cast(dims, logits.dtype) + 1,
                           dtype=logits.dtype)
        z_check = 1 + k * z_sorted > z_cumsum
        # because the z_check vector is always [1,1,...1,0,0,...0] finding the
        # (index + 1) of the last `1` is the same as just summing the number of 1.
        k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1)

        # calculate tau(z)
        indices = array_ops.stack([math_ops.range(0, obs), k_z - 1], axis=1)
        tau_sum = array_ops.gather_nd(z_cumsum, indices)
        tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype)

        spmax_policy = math_ops.maximum(math_ops.cast(0, logits.dtype),
                                        z - tau_z[:, array_ops.newaxis])

        z_square = math_ops.square(z_sorted)
        tau_square = math_ops.square(tau_z)
        spmax = 0.5 * (math_ops.reduce_sum(
            math_ops.cast(z_check, dtypes.float32) * z_square, axis=1) -
                       math_ops.cast(k_z, dtypes.float32) * tau_square) + 0.5

        return spmax_policy, spmax
示例#5
0
def sparsemax(logits, name=None):
  """Computes sparsemax activations [1].

  For each batch `i` and class `j` we have
    sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)

  [1]: https://arxiv.org/abs/1602.02068

  Args:
    logits: A `Tensor`. Must be one of the following types: `half`, `float32`,
      `float64`.
    name: A name for the operation (optional).

  Returns:
    A `Tensor`. Has the same type as `logits`.
  """

  with ops.name_scope(name, "sparsemax", [logits]) as name:
    logits = ops.convert_to_tensor(logits, name="logits")
    obs = array_ops.shape(logits)[0]
    dims = array_ops.shape(logits)[1]

    z = logits - math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis]

    # sort z
    z_sorted, _ = nn.top_k(z, k=dims)

    # calculate k(z)
    z_cumsum = math_ops.cumsum(z_sorted, axis=1)
    k = math_ops.range(
        1, math_ops.cast(dims, logits.dtype) + 1, dtype=logits.dtype
    )
    z_check = 1 + k * z_sorted > z_cumsum
    # because the z_check vector is always [1,1,...1,0,0,...0] finding the
    # (index + 1) of the last `1` is the same as just summing the number of 1.
    k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1)

    # calculate tau(z)
    indices = array_ops.stack([math_ops.range(0, obs), k_z - 1], axis=1)
    tau_sum = array_ops.gather_nd(z_cumsum, indices)
    tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype)

    # calculate p
    return math_ops.maximum(
        math_ops.cast(0, logits.dtype),
        z - tau_z[:, array_ops.newaxis]
    )
def _sparsemax(logits, name=None):
    """Computes sparsemax activations [1].

    For each batch `i` and class `j` we have
    sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)

    [1]: https://arxiv.org/abs/1602.02068

    :param logits, tensor

    Returns:
    A `Tensor`. Has the same type as `logits`.
    """

    with ops.name_scope(name, "sparsemax", [logits]) as name:
        logits = ops.convert_to_tensor(logits, name="logits")
        obs = logits.shape[0]
        dims = logits.shape[1]

        z = logits - math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis]

        # sort z
        z_sorted, _ = nn.top_k(z, k=dims)

        # calculate k(z)
        z_cumsum = math_ops.cumsum(z_sorted, axis=1)
        k = math_ops.range(
            1, math_ops.cast(dims, logits.dtype) + 1, dtype=logits.dtype)
        z_check = 1 + k * z_sorted > z_cumsum
        # because the z_check vector is always [1,1,...1,0,0,...0] finding the
        # (index + 1) of the last `1` is the same as just summing the number of 1.
        k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1)

        # calculate tau(z)
        indices = array_ops.stack([math_ops.range(0, obs), k_z - 1], axis=1)
        tau_sum = array_ops.gather_nd(z_cumsum, indices)
        tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype)

        # calculate p
        return math_ops.maximum(
            math_ops.cast(0, logits.dtype), z - tau_z[:, array_ops.newaxis])
示例#7
0
        def prop_raw(x):
            obs = array_ops.shape(x)[0]
            dim = array_ops.shape(x)[1]

            z = x - math_ops.reduce_mean(x, axis=1)[:, array_ops.newaxis]

            z_sorted, _ = nn.top_k(z, k=dim)

            z_cumsum = math_ops.cumsum(z_sorted, axis=1)
            k = math_ops.range(1,
                               math_ops.cast(dim, x.dtype) + 1,
                               dtype=x.dtype)
            z_check = 1 + k * z_sorted > z_cumsum

            k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32),
                                      axis=1)

            indices = array_ops.stack([math_ops.range(0, obs), k_z - 1],
                                      axis=1)
            tau_sum = array_ops.gather_nd(z_cumsum, indices)
            tau_z = (tau_sum - 1) / math_ops.cast(k_z, x.dtype)

            return math_ops.maximum(math_ops.cast(0, x.dtype),
                                    z - tau_z[:, array_ops.newaxis])
 def loop_fn(i):
   x_i = array_ops.gather(x, i)
   return nn.top_k(x_i)
示例#9
0
def dnn_sampled_softmax_classifier_model_fn(features, target_indices,
                                            mode, params):
  """model_fn that uses candidate sampling.

  Args:
    features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
    target_indices: A single Tensor of shape [batch_size, n_labels] containing
      the target indices.
    mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
    params: A dict of hyperparameters that are listed below.
      hidden_units- List of hidden units per layer. All layers are fully
        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
        has 32.
      feature_columns- An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      n_classes- number of target classes. It must be greater than 2.
      n_samples- number of sample target classes. Needs to be tuned - A good
        starting point could be 2% of n_classes.
      n_labels- number of labels in each example.
      top_k- The number of classes to predict.
      optimizer- An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      dropout- When not `None`, the probability we will drop out a given
        coordinate.
      gradient_clip_norm- A float > 0. If provided, gradients are
        clipped to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
      num_ps_replicas- The number of parameter server replicas.

  Returns:
    predictions: A single Tensor or a dict of Tensors.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """

  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  n_samples = params["n_samples"]
  n_labels = params["n_labels"]
  top_k = params["top_k"]
  optimizer = params["optimizer"]
  dropout = params["dropout"]
  gradient_clip_norm = params["gradient_clip_norm"]
  num_ps_replicas = params["num_ps_replicas"]

  parent_scope = "dnn_ss"

  # Setup the input layer partitioner.
  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Create the input layer.
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        features,
        feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  # Setup the hidden layer partitioner.
  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))

  final_hidden_layer_dim = None
  # Create hidden layers using fully_connected.
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id, [net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(net,
                                   num_hidden_units,
                                   variables_collections=[parent_scope],
                                   scope=scope)
      final_hidden_layer_dim = num_hidden_units
      # Add dropout if it is enabled.
      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
        net = layers.dropout(net, keep_prob=(1.0 - dropout))

  # Create the weights and biases for the logit layer.
  with variable_scope.variable_scope(
      parent_scope + "/logits", [net],
      partitioner=hidden_layer_partitioner) as scope:
    dtype = net.dtype.base_dtype
    weights_shape = [n_classes, final_hidden_layer_dim]
    weights = variables.model_variable(
        "weights",
        shape=weights_shape,
        dtype=dtype,
        initializer=initializers.xavier_initializer(),
        trainable=True,
        collections=[parent_scope])
    biases = variables.model_variable(
        "biases",
        shape=[n_classes,],
        dtype=dtype,
        initializer=init_ops.zeros_initializer,
        trainable=True,
        collections=[parent_scope])

  if mode == estimator.ModeKeys.TRAIN:
    # Call the candidate sampling APIs and calculate the loss.
    sampled_values = nn.learned_unigram_candidate_sampler(
        true_classes=math_ops.to_int64(target_indices),
        num_true=n_labels,
        num_sampled=n_samples,
        unique=True,
        range_max=n_classes)

    sampled_softmax_loss = nn.sampled_softmax_loss(
        weights=weights,
        biases=biases,
        inputs=net,
        labels=math_ops.to_int64(target_indices),
        num_sampled=n_samples,
        num_classes=n_classes,
        num_true=n_labels,
        sampled_values=sampled_values)

    loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss")

    train_op = optimizers.optimize_loss(
        loss=loss, global_step=contrib_framework.get_global_step(),
        learning_rate=_DEFAULT_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm,
        name=parent_scope)
    return None, loss, train_op

  elif mode == estimator.ModeKeys.EVAL:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    # Since the targets have multiple labels, setup the target probabilities
    # as 1.0/n_labels for each of the labels.
    target_one_hot = array_ops.one_hot(
        indices=target_indices,
        depth=n_classes,
        on_value=1.0 / n_labels)
    target_one_hot = math_ops.reduce_sum(
        input_tensor=target_one_hot,
        reduction_indices=[1])

    loss = math_ops.reduce_mean(
        nn.softmax_cross_entropy_with_logits(logits, target_one_hot))

    return predictions, loss, None

  elif mode == estimator.ModeKeys.INFER:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    return predictions, None, None
示例#10
0

import tensorflow as tf
import tensorflow.contrib as tfc
from tensorflow.python.ops import nn


#tf.enable_eager_execution()

with tf.Session() as sess:
	labels = tf.constant(value=[1,0,2], dtype=tf.int64)
	probs = tf.constant(value=[[0.8, 0.93, .2,.1],[.82, 0, .1,.83],[.92,.1, .90, .3]])
	_, ix = nn.top_k(probs, k=1)
	c = tf.metrics.recall_at_k(predictions= probs, labels= labels, k=1)
	d = tfc.metrics.streaming_sparse_recall_at_k(predictions=probs, labels=labels, k=1)
	sess.run(tf.global_variables_initializer())
	sess.run(tf.local_variables_initializer())
	print(sess.run(ix))
	print(sess.run(c))
	print(sess.run(d))
	print('done')
示例#11
0
  def create_batch(self):
    """Create queues to window and batch time series data.

    Returns:
      A dictionary of Tensors corresponding to the output of `self._reader`
      (from the `time_series_reader` constructor argument), each with shapes
      prefixed by [`batch_size`, `window_size`].
    """
    features = self._reader.read()
    if self._jitter:
      # TODO(agarwal, allenl): Figure out if more jitter is needed here.
      jitter = random_ops.random_uniform(shape=[], maxval=2, dtype=dtypes.int32)
    else:
      jitter = 0
    # To keep things efficient, we pass from the windowing batcher to the
    # batch-of-windows batcher in batches. This avoids the need for huge numbers
    # of threads, but does mean that jitter is only applied occasionally.
    # TODO(allenl): Experiment with different internal passing sizes.
    internal_passing_size = self._batch_size
    features_windowed = input_lib.batch(
        features,
        batch_size=self._window_size * internal_passing_size + jitter,
        enqueue_many=True,
        capacity=(self._queue_capacity_multiplier
                  * internal_passing_size * self._window_size),
        num_threads=self._num_threads)
    raw_features_windowed = features_windowed
    if self._jitter:
      features_windowed = {
          key: value[jitter:]
          for key, value in features_windowed.items()}
    features_windowed = {
        key: array_ops.reshape(
            value,
            array_ops.concat(
                [[internal_passing_size, self._window_size],
                 array_ops.shape(value)[1:]],
                axis=0))
        for key, value in features_windowed.items()}
    batch_and_window_shape = tensor_shape.TensorShape(
        [internal_passing_size, self._window_size])
    for key in features_windowed.keys():
      features_windowed[key].set_shape(
          batch_and_window_shape.concatenate(
              raw_features_windowed[key].get_shape()[1:]))
    # When switching files, we may end up with windows where the time is not
    # decreasing, even if times within each file are sorted (and even if those
    # files are visited in order, when looping back around to the beginning of
    # the first file). This is hard for models to deal with, so we either
    # discard such examples, creating a bias where the beginning and end of the
    # series is under-sampled, or we sort the window, creating large gaps.
    times = features_windowed[feature_keys.TrainEvalFeatures.TIMES]
    if self._discard_out_of_order:
      non_decreasing = math_ops.reduce_all(
          times[:, 1:] >= times[:, :-1], axis=1)
      # Ensure that no more than self._discard_limit complete batches are
      # discarded contiguously (resetting the count when we find a single clean
      # window). This prevents infinite looping when the dataset is smaller than
      # the window size.
      # TODO(allenl): Figure out a way to return informative errors from
      # count_up_to.
      discarded_windows_limiter = variable_scope.variable(
          initial_value=constant_op.constant(0, dtype=dtypes.int64),
          name="discarded_windows_limiter",
          trainable=False,
          collections=[ops.GraphKeys.LOCAL_VARIABLES])
      def _initialized_limit_check():
        return control_flow_ops.cond(
            math_ops.reduce_any(non_decreasing),
            lambda: state_ops.assign(discarded_windows_limiter, 0),
            lambda: discarded_windows_limiter.count_up_to(self._discard_limit))
      discard_limit_op = control_flow_ops.cond(
          state_ops.is_variable_initialized(discarded_windows_limiter),
          _initialized_limit_check,
          lambda: constant_op.constant(0, dtype=dtypes.int64))
      with ops.control_dependencies([discard_limit_op]):
        non_decreasing = array_ops.identity(non_decreasing)
    else:
      _, indices_descending = nn.top_k(
          times, k=array_ops.shape(times)[-1], sorted=True)
      indices = array_ops.reverse(indices_descending, axis=[0])
      features_windowed = {
          key: array_ops.gather(params=value, indices=indices)
          for key, value in features_windowed.items()
      }
      non_decreasing = True
    features_batched = input_lib.maybe_shuffle_batch(
        features_windowed,
        num_threads=self._num_threads,
        seed=self._shuffle_seed,
        batch_size=self._batch_size,
        capacity=self._queue_capacity_multiplier * self._batch_size,
        min_after_dequeue=(self._shuffle_min_after_dequeue_multiplier *
                           self._batch_size),
        keep_input=non_decreasing,
        enqueue_many=True)
    return (features_batched, None)
def dnn_sampled_softmax_classifier_model_fn(features, target_indices,
                                            mode, params):
  """model_fn that uses candidate sampling.

  Args:
    features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
    target_indices: A single Tensor of shape [batch_size, n_labels] containing
      the target indices.
    mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
    params: A dict of hyperparameters that are listed below.
      hidden_units- List of hidden units per layer. All layers are fully
        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
        has 32.
      feature_columns- An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      n_classes- number of target classes. It must be greater than 2.
      n_samples- number of sample target classes. Needs to be tuned - A good
        starting point could be 2% of n_classes.
      n_labels- number of labels in each example.
      top_k- The number of classes to predict.
      optimizer- An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      dropout- When not `None`, the probability we will drop out a given
        coordinate.
      gradient_clip_norm- A float > 0. If provided, gradients are
        clipped to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
      num_ps_replicas- The number of parameter server replicas.

  Returns:
    predictions: A single Tensor or a dict of Tensors.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """

  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  n_samples = params["n_samples"]
  n_labels = params["n_labels"]
  top_k = params["top_k"]
  optimizer = params["optimizer"]
  dropout = params["dropout"]
  gradient_clip_norm = params["gradient_clip_norm"]
  num_ps_replicas = params["num_ps_replicas"]

  parent_scope = "dnn_ss"

  # Setup the input layer partitioner.
  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Create the input layer.
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        features,
        feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  # Setup the hidden layer partitioner.
  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))

  final_hidden_layer_dim = None
  # Create hidden layers using fully_connected.
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id, [net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(net,
                                   num_hidden_units,
                                   variables_collections=[parent_scope],
                                   scope=scope)
      final_hidden_layer_dim = num_hidden_units
      # Add dropout if it is enabled.
      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
        net = layers.dropout(net, keep_prob=(1.0 - dropout))

  # Create the weights and biases for the logit layer.
  with variable_scope.variable_scope(
      parent_scope + "/logits", [net],
      partitioner=hidden_layer_partitioner) as scope:
    dtype = net.dtype.base_dtype
    weights_shape = [n_classes, final_hidden_layer_dim]
    weights = variables.model_variable(
        "weights",
        shape=weights_shape,
        dtype=dtype,
        initializer=initializers.xavier_initializer(),
        trainable=True,
        collections=[parent_scope])
    biases = variables.model_variable(
        "biases",
        shape=[n_classes,],
        dtype=dtype,
        initializer=init_ops.zeros_initializer,
        trainable=True,
        collections=[parent_scope])

  if mode == estimator.ModeKeys.TRAIN:
    # Call the candidate sampling APIs and calculate the loss.
    sampled_values = nn.learned_unigram_candidate_sampler(
        true_classes=math_ops.to_int64(target_indices),
        num_true=n_labels,
        num_sampled=n_samples,
        unique=True,
        range_max=n_classes)

    sampled_softmax_loss = nn.sampled_softmax_loss(
        weights=weights,
        biases=biases,
        inputs=net,
        labels=math_ops.to_int64(target_indices),
        num_sampled=n_samples,
        num_classes=n_classes,
        num_true=n_labels,
        sampled_values=sampled_values)

    loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss")

    train_op = optimizers.optimize_loss(
        loss=loss, global_step=contrib_framework.get_global_step(),
        learning_rate=_DEFAULT_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm,
        name=parent_scope)
    return None, loss, train_op

  elif mode == estimator.ModeKeys.EVAL:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    # Since the targets have multiple labels, setup the target probabilities
    # as 1.0/n_labels for each of the labels.
    target_one_hot = array_ops.one_hot(
        indices=target_indices,
        depth=n_classes,
        on_value=1.0 / n_labels)
    target_one_hot = math_ops.reduce_sum(
        input_tensor=target_one_hot,
        reduction_indices=[1])

    loss = math_ops.reduce_mean(
        nn.softmax_cross_entropy_with_logits(logits, target_one_hot))

    return predictions, loss, None

  elif mode == estimator.ModeKeys.INFER:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    return predictions, None, None
示例#13
0
def _rank_resample(weights, biases, inputs, sampled_values, num_resampled,
                   resampling_temperature, partition_strategy):
    """A helper function for rank_sampled_softmax_loss.

  This computes, for each i in `sampled_values`,

      log(sum_j exp((w_i * x_j + b_i) / resampling_temperature))

  where w_i, b_i are the weight and bias of the i-th class, respectively,
  and j ranges over the rows of `inputs`. For efficiency, we rearrange the
  computation to

      log(sum_j exp(w_i * (x_j / resampling_temperature))) +
          b_i / resampling_temperature.

  This translates to the following batched computation using tensorflow ops:

      reduce_logsumexp(matmul(embeddings,
                       transpose(inputs / resampling_temperature))) +
          biases / resampling_temperature

  The computation of the first term is colocated with the embeddings using
  `transform_fn` in `embedding_ops._embedding_lookup_and_transform`. The second
  term, not the bottleneck, is computed at the worker.

  Args:
    weights: From `rank_sampled_softmax_loss`.
    biases: From `rank_sampled_softmax_loss`.
    inputs: From `rank_sampled_softmax_loss`.
    sampled_values: A tuple of (`sampled_candidates`, `true_expected_count`,
        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
    num_resampled: An `int`. This many values are selected from
        `sampled_values` using the adaptive resampling algorithm. The caller
        must ensure that `num_resampled` is less than the size of
        `sampled_values`.
    resampling_temperature: A scalar `Tensor` with the temperature parameter
        for the adaptive resampling algorithm.
    partition_strategy: From `rank_sampled_softmax_loss`.

  Returns:
    A tuple of (`resampled_candidates`, `true_expected_count`,
        `resampled_expected_count`), similar to `sampled_values` but sampled
        down to `num_resampled` values.
  """
    # This code supports passing a Tensor for num_resampled, but since it is only
    # called with an int, that's what we specify in the arg list. If this
    # function is ever externalized, we should change the doc to support Tensor.

    sampled, true_expected_count, sampled_expected_count = sampled_values

    sampled = math_ops.cast(array_ops.stop_gradient(sampled), dtypes.int64)
    true_expected_count = array_ops.stop_gradient(true_expected_count)
    sampled_expected_count = array_ops.stop_gradient(sampled_expected_count)

    reweighted_inputs = inputs / resampling_temperature

    def logsumexp_logit(embeddings):
        return math_ops.reduce_logsumexp(math_ops.matmul(embeddings,
                                                         reweighted_inputs,
                                                         transpose_b=True),
                                         axis=1,
                                         keepdims=False)

    # Calling this protected form of embedding_lookup allows co-locating
    # the logsumexp computation with the partitioned weights, which yields
    # a large speedup in practice.
    sampled_logits = embedding_ops._embedding_lookup_and_transform(  # pylint: disable=protected-access
        weights,
        sampled,
        partition_strategy,
        transform_fn=logsumexp_logit)
    sampled_b = array_ops.reshape(
        embedding_ops.embedding_lookup(biases, sampled, partition_strategy),
        [-1])
    sampled_logits += sampled_b / resampling_temperature

    _, resampled_indices = nn.top_k(sampled_logits,
                                    k=num_resampled,
                                    sorted=False)
    resampled = array_ops.gather(sampled, indices=resampled_indices)
    resampled_expected_count = array_ops.gather(sampled_expected_count,
                                                indices=resampled_indices)

    return resampled, true_expected_count, resampled_expected_count
 def model(a):
     values, indices = nn.top_k(a, topn)
     return indices
    def create_batch(self):
        """Create queues to window and batch time series data.

    Returns:
      A dictionary of Tensors corresponding to the output of `self._reader`
      (from the `time_series_reader` constructor argument), each with shapes
      prefixed by [`batch_size`, `window_size`].
    """
        features = self._reader.read()
        if self._jitter:
            # TODO(agarwal, allenl): Figure out if more jitter is needed here.
            jitter = random_ops.random_uniform(shape=[],
                                               maxval=2,
                                               dtype=dtypes.int32)
        else:
            jitter = 0
        # To keep things efficient, we pass from the windowing batcher to the
        # batch-of-windows batcher in batches. This avoids the need for huge numbers
        # of threads, but does mean that jitter is only applied occasionally.
        # TODO(allenl): Experiment with different internal passing sizes.
        internal_passing_size = self._batch_size
        features_windowed = input_lib.batch(
            features,
            batch_size=self._window_size * internal_passing_size + jitter,
            enqueue_many=True,
            capacity=(self._queue_capacity_multiplier * internal_passing_size *
                      self._window_size),
            num_threads=self._num_threads)
        raw_features_windowed = features_windowed
        if self._jitter:
            features_windowed = {
                key: value[jitter:]
                for key, value in features_windowed.items()
            }
        features_windowed = {
            key: array_ops.reshape(
                value,
                array_ops.concat([[internal_passing_size, self._window_size],
                                  array_ops.shape(value)[1:]],
                                 axis=0))
            for key, value in features_windowed.items()
        }
        batch_and_window_shape = tensor_shape.TensorShape(
            [internal_passing_size, self._window_size])
        for key in features_windowed.keys():
            features_windowed[key].set_shape(
                batch_and_window_shape.concatenate(
                    raw_features_windowed[key].get_shape()[1:]))
        # When switching files, we may end up with windows where the time is not
        # decreasing, even if times within each file are sorted (and even if those
        # files are visited in order, when looping back around to the beginning of
        # the first file). This is hard for models to deal with, so we either
        # discard such examples, creating a bias where the beginning and end of the
        # series is under-sampled, or we sort the window, creating large gaps.
        times = features_windowed[feature_keys.TrainEvalFeatures.TIMES]
        if self._discard_out_of_order:
            non_decreasing = math_ops.reduce_all(times[:, 1:] >= times[:, :-1],
                                                 axis=1)
            # Ensure that no more than self._discard_limit complete batches are
            # discarded contiguously (resetting the count when we find a single clean
            # window). This prevents infinite looping when the dataset is smaller than
            # the window size.
            # TODO(allenl): Figure out a way to return informative errors from
            # count_up_to.
            discarded_windows_limiter = variable_scope.variable(
                initial_value=constant_op.constant(0, dtype=dtypes.int64),
                name="discarded_windows_limiter",
                trainable=False,
                collections=[ops.GraphKeys.LOCAL_VARIABLES])

            def _initialized_limit_check():
                return control_flow_ops.cond(
                    math_ops.reduce_any(non_decreasing),
                    lambda: state_ops.assign(discarded_windows_limiter, 0),
                    lambda: discarded_windows_limiter.count_up_to(
                        self._discard_limit))

            discard_limit_op = control_flow_ops.cond(
                state_ops.is_variable_initialized(discarded_windows_limiter),
                _initialized_limit_check,
                lambda: constant_op.constant(0, dtype=dtypes.int64))
            with ops.control_dependencies([discard_limit_op]):
                non_decreasing = array_ops.identity(non_decreasing)
        else:
            _, indices_descending = nn.top_k(times,
                                             k=array_ops.shape(times)[-1],
                                             sorted=True)
            indices = array_ops.reverse(indices_descending, axis=[0])
            features_windowed = {
                key: array_ops.gather(params=value, indices=indices)
                for key, value in features_windowed.items()
            }
            non_decreasing = True
        features_batched = input_lib.maybe_shuffle_batch(
            features_windowed,
            num_threads=self._num_threads,
            seed=self._shuffle_seed,
            batch_size=self._batch_size,
            capacity=self._queue_capacity_multiplier * self._batch_size,
            min_after_dequeue=(self._shuffle_min_after_dequeue_multiplier *
                               self._batch_size),
            keep_input=non_decreasing,
            enqueue_many=True)
        return (features_batched, None)
示例#16
0
 def model(a):
     _, indices = nn.top_k(a, topn)
     return indices
示例#17
0
def sparsemax(logits, name=None):
    """Computes sparsemax activations [1].

  For each batch `i` and class `j` we have
    $$sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)$$

  [1]: https://arxiv.org/abs/1602.02068

  Args:
    logits: A `Tensor`. Must be one of the following types: `half`, `float32`,
      `float64`.
    name: A name for the operation (optional).

  Returns:
    A `Tensor`. Has the same type as `logits`.
  """

    with ops.name_scope(name, "sparsemax", [logits]) as name:
        logits = ops.convert_to_tensor(logits, name="logits")
        obs = array_ops.shape(logits)[0]
        dims = array_ops.shape(logits)[1]

        # In the paper, they call the logits z.
        # The mean(logits) can be substracted from logits to make the algorithm
        # more numerically stable. the instability in this algorithm comes mostly
        # from the z_cumsum. Substacting the mean will cause z_cumsum to be close
        # to zero. However, in practise the numerical instability issues are very
        # minor and substacting the mean causes extra issues with inf and nan
        # input.
        z = logits

        # sort z
        z_sorted, _ = nn.top_k(z, k=dims)

        # calculate k(z)
        z_cumsum = math_ops.cumsum(z_sorted, axis=1)
        k = math_ops.range(1,
                           math_ops.cast(dims, logits.dtype) + 1,
                           dtype=logits.dtype)
        z_check = 1 + k * z_sorted > z_cumsum
        # because the z_check vector is always [1,1,...1,0,0,...0] finding the
        # (index + 1) of the last `1` is the same as just summing the number of 1.
        k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1)

        # calculate tau(z)
        # If there are inf values or all values are -inf, the k_z will be zero,
        # this is mathematically invalid and will also cause the gather_nd to fail.
        # Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then
        # fixed later (see p_safe) by returning p = nan. This results in the same
        # behavior as softmax.
        k_z_safe = math_ops.maximum(k_z, 1)
        indices = array_ops.stack([math_ops.range(0, obs), k_z_safe - 1],
                                  axis=1)
        tau_sum = array_ops.gather_nd(z_cumsum, indices)
        tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype)

        # calculate p
        p = math_ops.maximum(math_ops.cast(0, logits.dtype),
                             z - tau_z[:, array_ops.newaxis])
        # If k_z = 0 or if z = nan, then the input is invalid
        p_safe = array_ops.where(
            math_ops.logical_or(math_ops.equal(k_z, 0),
                                math_ops.is_nan(z_cumsum[:, -1])),
            array_ops.fill([obs, dims],
                           math_ops.cast(float("nan"), logits.dtype)), p)

        return p_safe
示例#18
0
 def model(a):
     return nn.top_k(a, k=10, sorted=True)
示例#19
0
 def model(a, k):
     return nn.top_k(a, k=k, sorted=True)
示例#20
0
def sparsemax(logits, name=None):
  """Computes sparsemax activations [1].

  For each batch `i` and class `j` we have
    $$sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)$$

  [1]: https://arxiv.org/abs/1602.02068

  Args:
    logits: A `Tensor`. Must be one of the following types: `half`, `float32`,
      `float64`.
    name: A name for the operation (optional).

  Returns:
    A `Tensor`. Has the same type as `logits`.
  """

  with ops.name_scope(name, "sparsemax", [logits]) as name:
    logits = ops.convert_to_tensor(logits, name="logits")
    obs = array_ops.shape(logits)[0]
    dims = array_ops.shape(logits)[1]

    # In the paper, they call the logits z.
    # The mean(logits) can be substracted from logits to make the algorithm
    # more numerically stable. the instability in this algorithm comes mostly
    # from the z_cumsum. Substacting the mean will cause z_cumsum to be close
    # to zero. However, in practise the numerical instability issues are very
    # minor and substacting the mean causes extra issues with inf and nan
    # input.
    z = logits

    # sort z
    z_sorted, _ = nn.top_k(z, k=dims)

    # calculate k(z)
    z_cumsum = math_ops.cumsum(z_sorted, axis=1)
    k = math_ops.range(
        1, math_ops.cast(dims, logits.dtype) + 1, dtype=logits.dtype)
    z_check = 1 + k * z_sorted > z_cumsum
    # because the z_check vector is always [1,1,...1,0,0,...0] finding the
    # (index + 1) of the last `1` is the same as just summing the number of 1.
    k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1)

    # calculate tau(z)
    # If there are inf values or all values are -inf, the k_z will be zero,
    # this is mathematically invalid and will also cause the gather_nd to fail.
    # Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then
    # fixed later (see p_safe) by returning p = nan. This results in the same
    # behavior as softmax.
    k_z_safe = math_ops.maximum(k_z, 1)
    indices = array_ops.stack([math_ops.range(0, obs), k_z_safe - 1], axis=1)
    tau_sum = array_ops.gather_nd(z_cumsum, indices)
    tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype)

    # calculate p
    p = math_ops.maximum(
        math_ops.cast(0, logits.dtype), z - tau_z[:, array_ops.newaxis])
    # If k_z = 0 or if z = nan, then the input is invalid
    p_safe = array_ops.where(
        math_ops.logical_or(
            math_ops.equal(k_z, 0), math_ops.is_nan(z_cumsum[:, -1])),
        array_ops.fill([obs, dims], math_ops.cast(float("nan"), logits.dtype)),
        p)

    return p_safe
示例#21
0
def _rank_resample(weights, biases, inputs, sampled_values, num_resampled,
                   resampling_temperature, partition_strategy):
  """A helper function for rank_sampled_softmax_loss.

  This computes, for each i in `sampled_values`,

      log(sum_j exp((w_i * x_j + b_i) / resampling_temperature))

  where w_i, b_i are the weight and bias of the i-th class, respectively,
  and j ranges over the rows of `inputs`. For efficiency, we rearrange the
  computation to

      log(sum_j exp(w_i * (x_j / resampling_temperature))) +
          b_i / resampling_temperature.

  This translates to the following batched computation using tensorflow ops:

      reduce_logsumexp(matmul(embeddings,
                       transpose(inputs / resampling_temperature))) +
          biases / resampling_temperature

  The computation of the first term is colocated with the embeddings using
  `transform_fn` in `embedding_ops._embedding_lookup_and_transform`. The second
  term, not the bottleneck, is computed at the worker.

  Args:
    weights: From `rank_sampled_softmax_loss`.
    biases: From `rank_sampled_softmax_loss`.
    inputs: From `rank_sampled_softmax_loss`.
    sampled_values: A tuple of (`sampled_candidates`, `true_expected_count`,
        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
    num_resampled: An `int`. This many values are selected from
        `sampled_values` using the adaptive resampling algorithm. The caller
        must ensure that `num_resampled` is less than the size of
        `sampled_values`.
    resampling_temperature: A scalar `Tensor` with the temperature parameter
        for the adaptive resampling algorithm.
    partition_strategy: From `rank_sampled_softmax_loss`.

  Returns:
    A tuple of (`resampled_candidates`, `true_expected_count`,
        `resampled_expected_count`), similar to `sampled_values` but sampled
        down to `num_resampled` values.
  """
  # This code supports passing a Tensor for num_resampled, but since it is only
  # called with an int, that's what we specify in the arg list. If this
  # function is ever externalized, we should change the doc to support Tensor.

  sampled, true_expected_count, sampled_expected_count = sampled_values

  sampled = math_ops.cast(array_ops.stop_gradient(sampled), dtypes.int64)
  true_expected_count = array_ops.stop_gradient(true_expected_count)
  sampled_expected_count = array_ops.stop_gradient(sampled_expected_count)

  reweighted_inputs = inputs / resampling_temperature

  def logsumexp_logit(embeddings):
    return math_ops.reduce_logsumexp(
        math_ops.matmul(embeddings, reweighted_inputs, transpose_b=True),
        axis=1,
        keepdims=False)

  # Calling this protected form of embedding_lookup allows co-locating
  # the logsumexp computation with the partitioned weights, which yields
  # a large speedup in practice.
  sampled_logits = embedding_ops._embedding_lookup_and_transform(  # pylint: disable=protected-access
      weights, sampled, partition_strategy, transform_fn=logsumexp_logit)
  sampled_b = array_ops.reshape(
      embedding_ops.embedding_lookup(biases, sampled, partition_strategy), [-1])
  sampled_logits += sampled_b / resampling_temperature

  _, resampled_indices = nn.top_k(sampled_logits, k=num_resampled, sorted=False)
  resampled = array_ops.gather(sampled, indices=resampled_indices)
  resampled_expected_count = array_ops.gather(
      sampled_expected_count, indices=resampled_indices)

  return resampled, true_expected_count, resampled_expected_count
 def loop_fn(i):
   x_i = array_ops.gather(x, i)
   return nn.top_k(x_i)