Exemplo n.º 1
0
def report_uninitialized_variables(var_list=None, name="report_uninitialized_variables"):
    """Adds ops to list the names of uninitialized variables.

  When run, it returns a 1-D tensor containing the names of uninitialized
  variables if there are any, or an empty array if there are none.

  Args:
    var_list: List of `Variable` objects to check. Defaults to the
      value of `all_variables() + local_variables()`
    name: Optional name of the `Operation`.

  Returns:
    A 1-D tensor containing names of the unintialized variables, or an empty 1-D
    tensor if there are no variables or no uninitialized variables.
  """
    if var_list is None:
        var_list = all_variables() + local_variables()
    # Backwards compatibility for old-style variables. TODO(touts): remove.
    if not var_list:
        var_list = []
        for op in ops.get_default_graph().get_operations():
            if op.type in ["Variable", "AutoReloadVariable"]:
                var_list.append(op.outputs[0])
    if not var_list:
        # Return an empty tensor so we only need to check for returned tensor
        # size being 0 as an indication of model ready.
        return array_ops.constant([], dtype=dtypes.string, name=name)
    else:
        # Get a 1-D boolean tensor listing whether each variable is initialized.
        variables_mask = math_ops.logical_not(array_ops.pack([state_ops.is_variable_initialized(v) for v in var_list]))
        # Get a 1-D string tensor containing all the variable names.
        variable_names_tensor = array_ops.constant([s.op.name for s in var_list])
        # Return a 1-D tensor containing all the names of uninitialized variables.
        return array_ops.boolean_mask(variable_names_tensor, variables_mask, name=name)
Exemplo n.º 2
0
  def _apply_transform(self, input_tensors, **kwargs):
    """Applies the transformation to the `transform_input`.

    Args:
      input_tensors: a list of Tensors representing the input to
        the Transform.
      **kwargs: Additional keyword arguments, unused here.

    Returns:
        A namedtuple of Tensors representing the transformed output.
    """
    d = input_tensors[0]

    if self.strip_value is np.nan:
      strip_hot = math_ops.is_nan(d)
    else:
      strip_hot = math_ops.equal(d,
                                 array_ops.constant([self.strip_value],
                                                    dtype=d.dtype))
    keep_hot = math_ops.logical_not(strip_hot)

    length = array_ops.reshape(array_ops.shape(d), [])
    indices = array_ops.boolean_mask(math_ops.range(length), keep_hot)
    values = array_ops.boolean_mask(d, keep_hot)

    sparse_indices = array_ops.reshape(
        math_ops.cast(indices, dtypes.int64), [-1, 1])
    shape = math_ops.cast(array_ops.shape(d), dtypes.int64)

    # pylint: disable=not-callable
    return self.return_type(ops.SparseTensor(sparse_indices, values, shape))
Exemplo n.º 3
0
    def body(time, outputs_ta, state, inputs, finished, sequence_lengths):
      """Internal while_loop body.

      Args:
        time: scalar int32 tensor.
        outputs_ta: structure of TensorArray.
        state: (structure of) state tensors and TensorArrays.
        inputs: (structure of) input tensors.
        finished: bool tensor (keeping track of what's finished).
        sequence_lengths: int32 tensor (keeping track of time of finish).

      Returns:
        `(time + 1, outputs_ta, next_state, next_inputs, next_finished,
          next_sequence_lengths)`.
        ```
      """
      (next_outputs, decoder_state, next_inputs,
       decoder_finished) = decoder.step(time, inputs, state)
      next_finished = math_ops.logical_or(decoder_finished, finished)
      if maximum_iterations is not None:
        next_finished = math_ops.logical_or(
            next_finished, time + 1 >= maximum_iterations)
      next_sequence_lengths = array_ops.where(
          math_ops.logical_and(math_ops.logical_not(finished), next_finished),
          array_ops.fill(array_ops.shape(sequence_lengths), time + 1),
          sequence_lengths)

      nest.assert_same_structure(state, decoder_state)
      nest.assert_same_structure(outputs_ta, next_outputs)
      nest.assert_same_structure(inputs, next_inputs)

      # Zero out output values past finish
      if impute_finished:
        emit = nest.map_structure(
            lambda out, zero: array_ops.where(finished, zero, out),
            next_outputs,
            zero_outputs)
      else:
        emit = next_outputs

      # Copy through states past finish
      def _maybe_copy_state(new, cur):
        # TensorArrays and scalar states get passed through.
        if isinstance(cur, tensor_array_ops.TensorArray):
          pass_through = True
        else:
          new.set_shape(cur.shape)
          pass_through = (new.shape.ndims == 0)
        return new if pass_through else array_ops.where(finished, cur, new)

      if impute_finished:
        next_state = nest.map_structure(
            _maybe_copy_state, decoder_state, state)
      else:
        next_state = decoder_state

      outputs_ta = nest.map_structure(lambda ta, out: ta.write(time, out),
                                      outputs_ta, emit)
      return (time + 1, outputs_ta, next_state, next_inputs, next_finished,
              next_sequence_lengths)
Exemplo n.º 4
0
def report_uninitialized_resources(resource_list=None,
                                   name="report_uninitialized_resources"):
  """Returns the names of all uninitialized resources in resource_list.

  If the returned tensor is empty then all resources have been initialized.

  Args:
   resource_list: resources to check. If None, will use shared_resources() +
    local_resources().
   name: name for the resource-checking op.

  Returns:
   Tensor containing names of the handles of all resources which have not
   yet been initialized.

  """
  if resource_list is None:
    resource_list = shared_resources() + local_resources()
  with ops.name_scope(name):
    # Run all operations on CPU
    with ops.device("/cpu:0"):
      if not resource_list:
        # Return an empty tensor so we only need to check for returned tensor
        # size being 0 as an indication of model ready.
        return array_ops.constant([], dtype=dtypes.string)
      # Get a 1-D boolean tensor listing whether each resource is initialized.
      variables_mask = math_ops.logical_not(
          array_ops.stack([r.is_initialized for r in resource_list]))
      # Get a 1-D string tensor containing all the resource names.
      variable_names_tensor = array_ops.constant(
          [s.handle.name for s in resource_list])
      # Return a 1-D tensor containing all the names of uninitialized resources.
      return array_ops.boolean_mask(variable_names_tensor, variables_mask)
Exemplo n.º 5
0
def _mask_probs(probs, eos_token, finished):
  """Masks log probabilities.

  The result is that finished beams allocate all probability mass to eos and
  unfinished beams remain unchanged.

  Args:
    probs: Log probabiltiies of shape `[batch_size, beam_width, vocab_size]`
    eos_token: An int32 id corresponding to the EOS token to allocate
      probability to.
    finished: A boolean tensor of shape `[batch_size, beam_width]` that
      specifies which elements in the beam are finished already.

  Returns:
    A tensor of shape `[batch_size, beam_width, vocab_size]`, where unfinished
    beams stay unchanged and finished beams are replaced with a tensor with all
    probability on the EOS token.
  """
  vocab_size = array_ops.shape(probs)[2]
  finished_mask = math_ops.cast(array_ops.expand_dims(finished, 2), probs.dtype)
  not_finished_mask = math_ops.cast(
      array_ops.expand_dims(math_ops.logical_not(finished), 2),
      probs.dtype)
  # These examples are not finished and we leave them
  non_finished_examples = not_finished_mask * probs
  # All finished examples are replaced with a vector that has all
  # probability on EOS
  finished_row = array_ops.one_hot(
      eos_token,
      vocab_size,
      dtype=probs.dtype,
      on_value=0.,
      off_value=probs.dtype.min)
  finished_examples = finished_mask * finished_row
  return finished_examples + non_finished_examples
Exemplo n.º 6
0
  def apply_attention_scores(self, scores, value, value_mask=None):
    """Applies attention scores to the given value tensor.

    To use this method in your attention layer, follow the steps:

    * Use `query` tensor of shape `[batch_size, Tq]` and `key` tensor of shape
      `[batch_size, Tv]` to calculate the attention `scores`.
    * Pass `scores` and `value` tensors to this method. The method applies
      `value_mask`, calculates `attention_distribution = softmax(scores)`, then
      returns `matmul(attention_distribution, value).
    * Apply `query_mask` and return the result.

    Args:
      scores: Scores float tensor of shape `[batch_size, Tq, Tv]`.
      value: Value tensor of shape `[batch_size, Tv, dim]`.
      value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`.
        If given, will apply the mask such that values at positions where
        `mask==False` do not contribute to the result.

    Returns:
      Tensor of shape `[batch_size, Tq, dim]`.
    """
    if value_mask is not None:
      # Mask of shape [batch_size, 1, Tv] that is True in padding position.
      padding_mask = array_ops.expand_dims(
          math_ops.logical_not(value_mask), axis=1)
      # Bias so padding positions do not contribute to attention distribution.
      scores -= 1.e9 * math_ops.cast(padding_mask, dtype=K.floatx())
    attention_distribution = nn.softmax(scores)
    return math_ops.matmul(attention_distribution, value)
Exemplo n.º 7
0
  def _apply_scores(self, scores, value, scores_mask=None):
    """Applies attention scores to the given value tensor.

    To use this method in your attention layer, follow the steps:

    * Use `query` tensor of shape `[batch_size, Tq]` and `key` tensor of shape
      `[batch_size, Tv]` to calculate the attention `scores`.
    * Pass `scores` and `value` tensors to this method. The method applies
      `scores_mask`, calculates `attention_distribution = softmax(scores)`, then
      returns `matmul(attention_distribution, value).
    * Apply `query_mask` and return the result.

    Args:
      scores: Scores float tensor of shape `[batch_size, Tq, Tv]`.
      value: Value tensor of shape `[batch_size, Tv, dim]`.
      scores_mask: A boolean mask `Tensor` of shape `[batch_size, 1, Tv]` or
        `[batch_size, Tq, Tv]`. If given, scores at positions where
        `scores_mask==False` do not contribute to the result. It must contain
        at least one `True` value in each line along the last dimension.

    Returns:
      Tensor of shape `[batch_size, Tq, dim]`.
    """
    if scores_mask is not None:
      padding_mask = math_ops.logical_not(scores_mask)
      # Bias so padding positions do not contribute to attention distribution.
      scores -= 1.e9 * math_ops.cast(padding_mask, dtype=K.floatx())
    attention_distribution = nn.softmax(scores)
    return math_ops.matmul(attention_distribution, value)
Exemplo n.º 8
0
 def _not(self, x, use_gpu=False):
   np_ans = np.logical_not(x)
   with test_util.device(use_gpu=use_gpu):
     out = math_ops.logical_not(ops.convert_to_tensor(x))
     tf_val = self.evaluate(out)
   self.assertEqual(out.dtype, dtypes_lib.bool)
   self.assertAllEqual(np_ans, tf_val)
   self.assertShapeEqual(np_ans, out)
Exemplo n.º 9
0
def kl_divergence(distribution_a, distribution_b,
                  allow_nan_stats=True, name=None):
  """Get the KL-divergence KL(distribution_a || distribution_b).

  If there is no KL method registered specifically for `type(distribution_a)`
  and `type(distribution_b)`, then the class hierarchies of these types are
  searched.

  If one KL method is registered between any pairs of classes in these two
  parent hierarchies, it is used.

  If more than one such registered method exists, the method whose registered
  classes have the shortest sum MRO paths to the input types is used.

  If more than one such shortest path exists, the first method
  identified in the search is used (favoring a shorter MRO distance to
  `type(distribution_a)`).

  Args:
    distribution_a: The first distribution.
    distribution_b: The second distribution.
    allow_nan_stats: Python `bool`, default `True`. When `True`,
      statistics (e.g., mean, mode, variance) use the value "`NaN`" to
      indicate the result is undefined. When `False`, an exception is raised
      if one or more of the statistic's batch members are undefined.
    name: Python `str` name prefixed to Ops created by this class.

  Returns:
    A Tensor with the batchwise KL-divergence between `distribution_a`
    and `distribution_b`.

  Raises:
    NotImplementedError: If no KL method is defined for distribution types
      of `distribution_a` and `distribution_b`.
  """
  kl_fn = _registered_kl(type(distribution_a), type(distribution_b))
  if kl_fn is None:
    raise NotImplementedError(
        "No KL(distribution_a || distribution_b) registered for distribution_a "
        "type %s and distribution_b type %s"
        % (type(distribution_a).__name__, type(distribution_b).__name__))

  with ops.name_scope("KullbackLeibler"):
    kl_t = kl_fn(distribution_a, distribution_b, name=name)
    if allow_nan_stats:
      return kl_t

    # Check KL for NaNs
    kl_t = array_ops.identity(kl_t, name="kl")

    with ops.control_dependencies([
        control_flow_ops.Assert(
            math_ops.logical_not(
                math_ops.reduce_any(math_ops.is_nan(kl_t))),
            ["KL calculation between %s and %s returned NaN values "
             "(and was called with allow_nan_stats=False). Values:"
             % (distribution_a.name, distribution_b.name), kl_t])]):
      return array_ops.identity(kl_t, name="checked_kl")
Exemplo n.º 10
0
  def next_inputs(self, time, outputs, state, sample_ids, name=None):
    with ops.name_scope(name, "ScheduledOutputTrainingHelperNextInputs",
                        [time, outputs, state, sample_ids]):
      (finished, base_next_inputs, state) = (
          super(ScheduledOutputTrainingHelper, self).next_inputs(
              time=time,
              outputs=outputs,
              state=state,
              sample_ids=sample_ids,
              name=name))
      sample_ids = math_ops.cast(sample_ids, dtypes.bool)

      def maybe_sample():
        """Perform scheduled sampling."""

        def maybe_concatenate_auxiliary_inputs(outputs_, indices=None):
          """Concatenate outputs with auxiliary inputs, if they exist."""
          if self._auxiliary_input_tas is None:
            return outputs_

          next_time = time + 1
          auxiliary_inputs = nest.map_structure(
              lambda ta: ta.read(next_time), self._auxiliary_input_tas)
          if indices is not None:
            auxiliary_inputs = array_ops.gather_nd(auxiliary_inputs, indices)
          return nest.map_structure(
              lambda x, y: array_ops.concat((x, y), -1),
              outputs_, auxiliary_inputs)

        if self._next_inputs_fn is None:
          return array_ops.where(
              sample_ids, maybe_concatenate_auxiliary_inputs(outputs),
              base_next_inputs)

        where_sampling = math_ops.cast(
            array_ops.where(sample_ids), dtypes.int32)
        where_not_sampling = math_ops.cast(
            array_ops.where(math_ops.logical_not(sample_ids)), dtypes.int32)
        outputs_sampling = array_ops.gather_nd(outputs, where_sampling)
        inputs_not_sampling = array_ops.gather_nd(base_next_inputs,
                                                  where_not_sampling)
        sampled_next_inputs = maybe_concatenate_auxiliary_inputs(
            self._next_inputs_fn(outputs_sampling), where_sampling)

        base_shape = array_ops.shape(base_next_inputs)
        return (array_ops.scatter_nd(indices=where_sampling,
                                     updates=sampled_next_inputs,
                                     shape=base_shape)
                + array_ops.scatter_nd(indices=where_not_sampling,
                                       updates=inputs_not_sampling,
                                       shape=base_shape))

      all_finished = math_ops.reduce_all(finished)
      no_samples = math_ops.logical_not(math_ops.reduce_any(sample_ids))
      next_inputs = control_flow_ops.cond(
          math_ops.logical_or(all_finished, no_samples),
          lambda: base_next_inputs, maybe_sample)
      return (finished, next_inputs, state)
Exemplo n.º 11
0
 def _not(self, x, use_gpu=False):
   np_ans = np.logical_not(x)
   with self.test_session(use_gpu=use_gpu,
                          force_gpu=use_gpu and test_util.is_gpu_available()):
     out = math_ops.logical_not(ops.convert_to_tensor(x))
     tf_val = self.evaluate(out)
   self.assertEqual(out.dtype, dtypes_lib.bool)
   self.assertAllEqual(np_ans, tf_val)
   self.assertShapeEqual(np_ans, out)
  def _assert_non_singular(self):
    if self.dtype.is_complex:
      should_be_nonzero = math_ops.complex_abs(self._diag)
    else:
      should_be_nonzero = self._diag

    nonzero_diag = math_ops.reduce_all(
        math_ops.logical_not(math_ops.equal(should_be_nonzero, 0)))

    return control_flow_ops.Assert(
        nonzero_diag,
        data=["Singular operator: diag contained zero values.", self._diag])
Exemplo n.º 13
0
def kl(dist_a, dist_b, allow_nan=False, name=None):
  """Get the KL-divergence KL(dist_a || dist_b).

  If there is no KL method registered specifically for `type(dist_a)` and
  `type(dist_b)`, then the class hierarchies of these types are searched.

  If one KL method is registered between any pairs of classes in these two
  parent hierarchies, it is used.

  If more than one such registered method exists, the method whose registered
  classes have the shortest sum MRO paths to the input types is used.

  If more than one such shortest path exists, the first method
  identified in the search is used (favoring a shorter MRO distance to
  `type(dist_a)`).

  Args:
    dist_a: The first distribution.
    dist_b: The second distribution.
    allow_nan: If `False` (default), a runtime error is raised
      if the KL returns NaN values for any batch entry of the given
      distributions.  If `True`, the KL may return a NaN for the given entry.
    name: (optional) Name scope to use for created operations.

  Returns:
    A Tensor with the batchwise KL-divergence between dist_a and dist_b.

  Raises:
    NotImplementedError: If no KL method is defined for distribution types
      of dist_a and dist_b.
  """
  kl_fn = _registered_kl(type(dist_a), type(dist_b))
  if kl_fn is None:
    raise NotImplementedError(
        "No KL(dist_a || dist_b) registered for dist_a type %s and dist_b "
        "type %s" % ((type(dist_a).__name__, type(dist_b).__name__)))

  with ops.name_scope("KullbackLeibler"):
    kl_t = kl_fn(dist_a, dist_b, name=name)
    if allow_nan:
      return kl_t

    # Check KL for NaNs
    kl_t = array_ops.identity(kl_t, name="kl")

    with ops.control_dependencies([
        control_flow_ops.Assert(
            math_ops.logical_not(
                math_ops.reduce_any(math_ops.is_nan(kl_t))),
            ["KL calculation between %s and %s returned NaN values "
             "(and was called with allow_nan=False).  Values:"
             % (dist_a.name, dist_b.name), kl_t])]):
      return array_ops.identity(kl_t, name="checked_kl")
Exemplo n.º 14
0
def kl(dist_a, dist_b, allow_nan=False, name=None):
    """Get the KL-divergence KL(dist_a || dist_b).

  Args:
    dist_a: instance of distributions.Distribution.
    dist_b: instance of distributions.Distribution.
    allow_nan: If False (default), a runtime error is raised
      if the KL returns NaN values for any batch entry of the given
      distributions.  If True, the KL may return a NaN for the given entry.
    name: (optional) Name scope to use for created operations.

  Returns:
    A Tensor with the batchwise KL-divergence between dist_a and dist_b.

  Raises:
    TypeError: If dist_a or dist_b is not an instance of Distribution.
    NotImplementedError: If no KL method is defined for distribution types
      of dist_a and dist_b.
  """
    if not isinstance(dist_a, distribution.Distribution):
        raise TypeError("dist_a is not an instance of Distribution, received type: %s" % type(dist_a))
    if not isinstance(dist_b, distribution.Distribution):
        raise TypeError("dist_b is not an instance of Distribution, received type: %s" % type(dist_b))
    kl_fn = _DIVERGENCES.get((type(dist_a), type(dist_b)), None)
    if kl_fn is None:
        raise NotImplementedError(
            "No KL(dist_a || dist_b) registered for dist_a type %s and dist_b "
            "type %s" % ((type(dist_a).__name__, type(dist_b).__name__))
        )
    with ops.name_scope("KullbackLeibler"):
        kl_t = kl_fn(dist_a, dist_b, name=name)
        if allow_nan:
            return kl_t

        # Check KL for NaNs
        kl_t = array_ops.identity(kl_t, name="kl")

        with ops.control_dependencies(
            [
                logging_ops.Assert(
                    math_ops.logical_not(math_ops.reduce_any(math_ops.is_nan(kl_t))),
                    [
                        "KL calculation between %s and %s returned NaN values "
                        "(and was called with allow_nan=False).  Values:" % (dist_a.name, dist_b.name),
                        kl_t,
                    ],
                )
            ]
        ):
            return array_ops.identity(kl_t, name="checked_kl")
Exemplo n.º 15
0
def _MaximumMinimumGrad(op, grad, selector_op):
  """Factor out the code for the gradient of Maximum or Minimum."""
  x = op.inputs[0]
  y = op.inputs[1]
  gdtype = grad.dtype
  sx = array_ops.shape(x)
  sy = array_ops.shape(y)
  gradshape = array_ops.shape(grad)
  zeros = array_ops.zeros(gradshape, gdtype)
  xmask = selector_op(x, y)
  rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
  xgrad = array_ops.where(xmask, grad, zeros)
  ygrad = array_ops.where(math_ops.logical_not(xmask), grad, zeros)
  gx = array_ops.reshape(math_ops.reduce_sum(xgrad, rx), sx)
  gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy)
  return (gx, gy)
Exemplo n.º 16
0
def pairwise_distance(feature, squared=False):
  """Computes the pairwise distance matrix with numerical stability.

  output[i, j] = || feature[i, :] - feature[j, :] ||_2

  Args:
    feature: 2-D Tensor of size [number of data, feature dimension].
    squared: Boolean, whether or not to square the pairwise distances.

  Returns:
    pairwise_distances: 2-D Tensor of size [number of data, number of data].
  """
  pairwise_distances_squared = math_ops.add(
      math_ops.reduce_sum(
          math_ops.square(feature),
          axis=[1],
          keepdims=True),
      math_ops.reduce_sum(
          math_ops.square(
              array_ops.transpose(feature)),
          axis=[0],
          keepdims=True)) - 2.0 * math_ops.matmul(
              feature, array_ops.transpose(feature))

  # Deal with numerical inaccuracies. Set small negatives to zero.
  pairwise_distances_squared = math_ops.maximum(pairwise_distances_squared, 0.0)
  # Get the mask where the zero distances are at.
  error_mask = math_ops.less_equal(pairwise_distances_squared, 0.0)

  # Optionally take the sqrt.
  if squared:
    pairwise_distances = pairwise_distances_squared
  else:
    pairwise_distances = math_ops.sqrt(
        pairwise_distances_squared + math_ops.to_float(error_mask) * 1e-16)

  # Undo conditionally adding 1e-16.
  pairwise_distances = math_ops.multiply(
      pairwise_distances, math_ops.to_float(math_ops.logical_not(error_mask)))

  num_data = array_ops.shape(feature)[0]
  # Explicitly set diagonals to zero.
  mask_offdiagonals = array_ops.ones_like(pairwise_distances) - array_ops.diag(
      array_ops.ones([num_data]))
  pairwise_distances = math_ops.multiply(pairwise_distances, mask_offdiagonals)
  return pairwise_distances
Exemplo n.º 17
0
def _make_auc_histograms(boolean_labels, scores, score_range, nbins):
  """Create histogram tensors from one batch of labels/scores."""

  with variable_scope.variable_op_scope(
      [boolean_labels, scores, nbins], None, 'make_auc_histograms'):
    # Histogram of scores for records in this batch with True label.
    hist_true = histogram_ops.histogram_fixed_width(
        array_ops.boolean_mask(scores, boolean_labels),
        score_range,
        nbins=nbins,
        dtype=dtypes.int64,
        name='hist_true')
    # Histogram of scores for records in this batch with False label.
    hist_false = histogram_ops.histogram_fixed_width(
        array_ops.boolean_mask(scores, math_ops.logical_not(boolean_labels)),
        score_range,
        nbins=nbins,
        dtype=dtypes.int64,
        name='hist_false')
    return hist_true, hist_false
Exemplo n.º 18
0
      def maybe_sample():
        """Perform scheduled sampling."""
        if self._next_input_layer is None:
          return array_ops.where(sample_ids, outputs, base_next_inputs)

        where_sampling = math_ops.cast(
            array_ops.where(sample_ids), dtypes.int32)
        where_not_sampling = math_ops.cast(
            array_ops.where(math_ops.logical_not(sample_ids)), dtypes.int32)
        outputs_sampling = array_ops.gather_nd(outputs, where_sampling)
        inputs_not_sampling = array_ops.gather_nd(base_next_inputs,
                                                  where_not_sampling)
        sampled_next_inputs = self._next_input_layer(outputs_sampling)
        base_shape = array_ops.shape(base_next_inputs)
        return (array_ops.scatter_nd(indices=where_sampling,
                                     updates=sampled_next_inputs,
                                     shape=base_shape)
                + array_ops.scatter_nd(indices=where_not_sampling,
                                       updates=inputs_not_sampling,
                                       shape=base_shape))
Exemplo n.º 19
0
      def maybe_sample():
        """Perform scheduled sampling."""

        def maybe_concatenate_auxiliary_inputs(outputs_, indices=None):
          """Concatenate outputs with auxiliary inputs, if they exist."""
          if self._auxiliary_input_tas is None:
            return outputs_

          next_time = time + 1
          auxiliary_inputs = nest.map_structure(
              lambda ta: ta.read(next_time), self._auxiliary_input_tas)
          if indices is not None:
            auxiliary_inputs = array_ops.gather_nd(auxiliary_inputs, indices)
          return nest.map_structure(
              lambda x, y: array_ops.concat((x, y), -1),
              outputs_, auxiliary_inputs)

        if self._next_input_layer is None:
          return array_ops.where(
              sample_ids, maybe_concatenate_auxiliary_inputs(outputs),
              base_next_inputs)

        where_sampling = math_ops.cast(
            array_ops.where(sample_ids), dtypes.int32)
        where_not_sampling = math_ops.cast(
            array_ops.where(math_ops.logical_not(sample_ids)), dtypes.int32)
        outputs_sampling = array_ops.gather_nd(outputs, where_sampling)
        inputs_not_sampling = array_ops.gather_nd(base_next_inputs,
                                                  where_not_sampling)
        sampled_next_inputs = maybe_concatenate_auxiliary_inputs(
            self._next_input_layer(outputs_sampling), where_sampling)

        base_shape = array_ops.shape(base_next_inputs)
        return (array_ops.scatter_nd(indices=where_sampling,
                                     updates=sampled_next_inputs,
                                     shape=base_shape)
                + array_ops.scatter_nd(indices=where_not_sampling,
                                       updates=inputs_not_sampling,
                                       shape=base_shape))
Exemplo n.º 20
0
 def false_fn():
     return MaskedTensorV1(
         array_ops.where_v2(mt.mask, 100, mt.values * 2),
         math_ops.logical_not(mt.mask))
Exemplo n.º 21
0
  def test_sweeps(self):
    is_row_sweep_var = variables.Variable(True)
    is_sweep_done_var = variables.Variable(False)
    init_done = variables.Variable(False)
    row_prep_done = variables.Variable(False)
    col_prep_done = variables.Variable(False)
    row_train_done = variables.Variable(False)
    col_train_done = variables.Variable(False)

    init_op = state_ops.assign(init_done, True)
    row_prep_op = state_ops.assign(row_prep_done, True)
    col_prep_op = state_ops.assign(col_prep_done, True)
    row_train_op = state_ops.assign(row_train_done, True)
    col_train_op = state_ops.assign(col_train_done, True)
    train_op = control_flow_ops.no_op()
    switch_op = control_flow_ops.group(
        state_ops.assign(is_sweep_done_var, False),
        state_ops.assign(is_row_sweep_var,
                         math_ops.logical_not(is_row_sweep_var)))
    mark_sweep_done = state_ops.assign(is_sweep_done_var, True)

    with self.test_session() as sess:
      sweep_hook = wals_lib._SweepHook(
          is_row_sweep_var,
          is_sweep_done_var,
          init_op,
          [row_prep_op],
          [col_prep_op],
          row_train_op,
          col_train_op,
          switch_op)
      mon_sess = monitored_session._HookedSession(sess, [sweep_hook])
      sess.run([variables.global_variables_initializer()])

      # Row sweep.
      mon_sess.run(train_op)
      self.assertTrue(sess.run(init_done),
                      msg='init op not run by the Sweephook')
      self.assertTrue(sess.run(row_prep_done),
                      msg='row_prep_op not run by the SweepHook')
      self.assertTrue(sess.run(row_train_done),
                      msg='row_train_op not run by the SweepHook')
      self.assertTrue(
          sess.run(is_row_sweep_var),
          msg='Row sweep is not complete but is_row_sweep_var is False.')
      # Col sweep.
      mon_sess.run(mark_sweep_done)
      mon_sess.run(train_op)
      self.assertTrue(sess.run(col_prep_done),
                      msg='col_prep_op not run by the SweepHook')
      self.assertTrue(sess.run(col_train_done),
                      msg='col_train_op not run by the SweepHook')
      self.assertFalse(
          sess.run(is_row_sweep_var),
          msg='Col sweep is not complete but is_row_sweep_var is True.')
      # Row sweep.
      mon_sess.run(mark_sweep_done)
      mon_sess.run(train_op)
      self.assertTrue(
          sess.run(is_row_sweep_var),
          msg='Col sweep is complete but is_row_sweep_var is False.')
Exemplo n.º 22
0
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                      beam_width, end_token, length_penalty_weight):
  """Performs a single step of Beam Search Decoding.

  Args:
    time: Beam search time step, should start at 0. At time 0 we assume
      that all beams are equal and consider only the first beam for
      continuations.
    logits: Logits at the current time step. A tensor of shape
      `[batch_size, beam_width, vocab_size]`
    next_cell_state: The next state from the cell, e.g. an instance of
      AttentionWrapperState if the cell is attentional.
    beam_state: Current state of the beam search.
      An instance of `BeamSearchDecoderState`.
    batch_size: The batch size for this input.
    beam_width: Python int.  The size of the beams.
    end_token: The int32 end token.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.

  Returns:
    A new beam state.
  """
  static_batch_size = tensor_util.constant_value(batch_size)

  # Calculate the current lengths of the predictions
  prediction_lengths = beam_state.lengths
  previously_finished = beam_state.finished

  # Calculate the total log probs for the new hypotheses
  # Final Shape: [batch_size, beam_width, vocab_size]
  step_log_probs = nn_ops.log_softmax(logits)
  step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished)
  total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs

  # Calculate the continuation lengths by adding to all continuing beams.
  vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1]
  lengths_to_add = array_ops.one_hot(
      indices=array_ops.fill([batch_size, beam_width], end_token),
      depth=vocab_size,
      on_value=np.int64(0), off_value=np.int64(1),
      dtype=dtypes.int64)
  add_mask = math_ops.to_int64(math_ops.logical_not(previously_finished))
  lengths_to_add *= array_ops.expand_dims(add_mask, 2)
  new_prediction_lengths = (
      lengths_to_add + array_ops.expand_dims(prediction_lengths, 2))

  # Calculate the scores for each beam
  scores = _get_scores(
      log_probs=total_probs,
      sequence_lengths=new_prediction_lengths,
      length_penalty_weight=length_penalty_weight)

  time = ops.convert_to_tensor(time, name="time")
  # During the first time step we only consider the initial beam
  scores_shape = array_ops.shape(scores)
  scores_flat = control_flow_ops.cond(
      time > 0,
      lambda: array_ops.reshape(scores, [batch_size, -1]),
      lambda: scores[:, 0])
  num_available_beam = control_flow_ops.cond(
      time > 0, lambda: math_ops.reduce_prod(scores_shape[1:]),
      lambda: math_ops.reduce_prod(scores_shape[2:]))

  # Pick the next beams according to the specified successors function
  next_beam_size = math_ops.minimum(
      ops.convert_to_tensor(beam_width, dtype=dtypes.int32, name="beam_width"),
      num_available_beam)
  next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size)

  next_beam_scores.set_shape([static_batch_size, beam_width])
  word_indices.set_shape([static_batch_size, beam_width])

  # Pick out the probs, beam_ids, and states according to the chosen predictions
  next_beam_probs = _tensor_gather_helper(
      gather_indices=word_indices,
      gather_from=total_probs,
      batch_size=batch_size,
      range_size=beam_width * vocab_size,
      gather_shape=[-1],
      name="next_beam_probs")
  # Note: just doing the following
  #   math_ops.to_int32(word_indices % vocab_size,
  #       name="next_beam_word_ids")
  # would be a lot cleaner but for reasons unclear, that hides the results of
  # the op which prevents capturing it with tfdbg debug ops.
  raw_next_word_ids = math_ops.mod(word_indices, vocab_size,
                                   name="next_beam_word_ids")
  next_word_ids = math_ops.to_int32(raw_next_word_ids)
  next_beam_ids = math_ops.to_int32(word_indices / vocab_size,
                                    name="next_beam_parent_ids")

  # Append new ids to current predictions
  previously_finished = _tensor_gather_helper(
      gather_indices=next_beam_ids,
      gather_from=previously_finished,
      batch_size=batch_size,
      range_size=beam_width,
      gather_shape=[-1])
  next_finished = math_ops.logical_or(previously_finished,
                                      math_ops.equal(next_word_ids, end_token),
                                      name="next_beam_finished")

  # Calculate the length of the next predictions.
  # 1. Finished beams remain unchanged.
  # 2. Beams that are now finished (EOS predicted) have their length
  #    increased by 1.
  # 3. Beams that are not yet finished have their length increased by 1.
  lengths_to_add = math_ops.to_int64(math_ops.logical_not(previously_finished))
  next_prediction_len = _tensor_gather_helper(
      gather_indices=next_beam_ids,
      gather_from=beam_state.lengths,
      batch_size=batch_size,
      range_size=beam_width,
      gather_shape=[-1])
  next_prediction_len += lengths_to_add

  # Pick out the cell_states according to the next_beam_ids. We use a
  # different gather_shape here because the cell_state tensors, i.e.
  # the tensors that would be gathered from, all have dimension
  # greater than two and we need to preserve those dimensions.
  # pylint: disable=g-long-lambda
  next_cell_state = nest.map_structure(
      lambda gather_from: _maybe_tensor_gather_helper(
          gather_indices=next_beam_ids,
          gather_from=gather_from,
          batch_size=batch_size,
          range_size=beam_width,
          gather_shape=[batch_size * beam_width, -1]),
      next_cell_state)
  # pylint: enable=g-long-lambda

  next_state = BeamSearchDecoderState(
      cell_state=next_cell_state,
      log_probs=next_beam_probs,
      lengths=next_prediction_len,
      finished=next_finished)

  output = BeamSearchDecoderOutput(
      scores=next_beam_scores,
      predicted_ids=next_word_ids,
      parent_ids=next_beam_ids)

  return output, next_state
Exemplo n.º 23
0
def from_tensor(tensor, lengths=None, padding=None, ragged_rank=1, name=None):
    """Converts a `Tensor` into a `RaggedTensor`.

  The set of absent/default values may be specified using a vector of lengths
  or a padding value (but not both).  If `lengths` is specified, then the
  output tensor will satisfy `output[row] = tensor[row][:lengths[row]]`.
  If `padding` is specified, then any row *suffix* consisting entirely of
  `padding` will be excluded from the returned `RaggedTensor`.  If neither
  `lengths` nor `padding` is specified, then the returned `RaggedTensor` will
  have no absent/default values.

  Examples:

  ```python
  >>> dt = tf.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]])
  >>> ragged.from_tensor(dt).eval().tolist()
  [[5, 7, 0], [0, 3, 0], [6, 0, 0]]
  >>> ragged.from_tensor(dt, lengths=[2, 0, 3]).eval().tolist()
  [[5, 7], [], [6, 0, 0]]
  >>> ragged.from_tensor(dt, padding=0).eval().tolist()
  [[5, 7], [0, 3], [6]]
  ```

  Args:
    tensor: The `Tensor` to convert.  Must have rank `ragged_rank + 1` or
      higher.
    lengths: An optional set of row lengths, specified using a 1-D integer
      `Tensor` whose length is equal to `tensor.shape[0]` (the number of rows in
      `tensor`).  If specified, then `output[row]` will contain
      `tensor[row][:lengths[row]]`.  Negative lengths are treated as zero.
    padding: An optional padding value.  If specified, then any row suffix
      consisting entirely of `padding` will be excluded from the returned
      RaggedTensor.  `padding` is a `Tensor` with the same dtype as `tensor`
      and with `shape=tensor.shape[ragged_rank + 1:]`.
    ragged_rank: Integer specifying the ragged rank for the returned
      `RaggedTensor`.  Must be greater than zero.
    name: A name prefix for the returned tensors (optional).

  Returns:
    A `RaggedTensor` with the specified `ragged_rank`.  The shape of the
    returned ragged tensor is compatible with the shape of `tensor`.
  Raises:
    ValueError: If both `lengths` and `padding` are specified.
  """
    if lengths is not None and padding is not None:
        raise ValueError('Specify lengths or padding, but not both')
    if not isinstance(ragged_rank, int):
        raise TypeError('ragged_rank expected int, got %r' % ragged_rank)
    if ragged_rank <= 0:
        raise ValueError('ragged_rank must be greater than 0; got %s' %
                         ragged_rank)

    with ops.name_scope(name, 'RaggedFromTensor', [tensor, lengths, padding]):
        tensor = ops.convert_to_tensor(tensor, name='tensor')
        tensor.shape.with_rank_at_least(ragged_rank + 1)
        input_shape = array_ops.shape(tensor, out_type=dtypes.int64)
        ncols = input_shape[1]

        # Handle ragged_rank>1 via recursion:
        # If the output should have multiple ragged dimensions, then first
        # flatten the tensor to eliminate all but the last ragged dimension,
        # and recursively convert that flattened tensor.  Then add on the splits
        # for the dimensions that we flattened out.
        if ragged_rank > 1:
            # Flatten `tensor` to eliminate all but the last ragged dimension.
            new_shape = array_ops.concat([
                constant_op.constant([-1], dtypes.int64),
                input_shape[ragged_rank:]
            ],
                                         axis=0)
            flattened = array_ops.reshape(tensor, new_shape)
            # Recursively convert the flattened tensor.
            values = from_tensor(flattened, lengths, padding)
            # The total number of elements in each  dimension.  E.g., if
            # input_shape=[3, 4, 5, 6], then dim[2] has 3*4*5 elements in total.
            dim_size = math_ops.cumprod(input_shape)
            # Construct splits tensors for the dimensions that were flattened.
            new_splits = [
                math_ops.range(0, dim_size[dim - 1] + 1) * input_shape[dim]
                for dim in range(1, ragged_rank)
            ]
            return ragged_factory_ops.from_nested_row_splits(
                values, new_splits)

        # If padding was specified, then use it to find row lengths.
        if padding is not None:
            padding = ops.convert_to_tensor(padding,
                                            name='padding',
                                            dtype=tensor.dtype)
            padding.shape.assert_is_compatible_with(tensor.shape[2:])

            # Find places where the padding is equal to the tensor.  (This will
            # broadcast `padding` across the outermost 2 dimensions of `tensor`,
            # so `has_default_value.shape = tensor.shape`.)
            has_default_value = math_ops.equal(padding, tensor)

            # If the padding isn't a scalar, then require that all values in the
            # padding match each item in the tensor.  After this block of code,
            # `has_default.shape = tensor.shape[:2]`.  (Unfortunately, we can't just
            # use reduce_all for both cases, becaue when you pass an empty `axis`
            # list to reduce_all, it reduces all axes; but we want it to reduce no
            # axes -- i.e., to be a no-op.)
            tensor_rank = array_ops.rank(tensor)
            reduce_axis = math_ops.range(2, tensor_rank)
            has_default = control_flow_ops.cond(
                tensor_rank > 2, lambda: math_ops.reduce_all(has_default_value,
                                                             axis=reduce_axis),
                lambda: has_default_value)
            has_default.set_shape(tensor_shape.TensorShape([None, None]))
            has_default.set_shape(tensor.shape[:2])

            # Use has_default it to find the length of each row: for each non-default
            # item in a row, calculate the length that the row needs to have to
            # include that item; and then take the max of those values (across each
            # row).
            has_nondefault = math_ops.logical_not(has_default)
            has_nondefault = math_ops.cast(has_nondefault, dtypes.int64)
            length_for_nondefault_value = (
                has_nondefault *
                array_ops.expand_dims(math_ops.range(1, ncols + 1), 0))
            lengths = math_ops.reduce_max(length_for_nondefault_value, axis=1)

        # If we have lengths (either directly supplied, or computed from paddings),
        # then use those to construct splits; and then use masking to get the
        # corresponding values.
        if lengths is not None:
            lengths = ragged_util.convert_to_int_tensor(
                lengths, 'lengths', dtypes.int64)
            lengths.shape.assert_has_rank(1)
            lengths = math_ops.minimum(lengths, ncols)
            lengths = math_ops.maximum(lengths, 0)
            limits = math_ops.cumsum(lengths)
            splits = array_ops.concat(
                [array_ops.zeros([1], dtypes.int64), limits], axis=0)
            mask = array_ops.sequence_mask(lengths, maxlen=ncols)
            values = array_ops.boolean_mask(tensor, mask)
            return ragged_factory_ops.from_row_splits(values, splits)

        # If neither padding nor lengths were specified, then create a splits
        # vector that contains no default values, and reshape the input tensor
        # to form the values for the RaggedTensor.
        nrows = input_shape[0]
        nvals = nrows * ncols
        splits = math_ops.range(nrows + 1) * ncols
        values_shape = array_ops.concat([[nvals], input_shape[2:]], axis=0)
        values = array_ops.reshape(tensor, values_shape)
        return ragged_factory_ops.from_row_splits(values, splits)
Exemplo n.º 24
0
def lifted_struct_loss(labels, embeddings, margin=1.0):
  """Computes the lifted structured loss.

  The loss encourages the positive distances (between a pair of embeddings
  with the same labels) to be smaller than any negative distances (between a
  pair of embeddings with different labels) in the mini-batch in a way
  that is differentiable with respect to the embedding vectors.
  See: https://arxiv.org/abs/1511.06452.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    lifted_loss: tf.float32 scalar.
  """
  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
  lshape = array_ops.shape(labels)
  assert lshape.shape == 1
  labels = array_ops.reshape(labels, [lshape[0], 1])

  # Build pairwise squared distance matrix.
  pairwise_distances = pairwise_distance(embeddings)

  # Build pairwise binary adjacency matrix.
  adjacency = math_ops.equal(labels, array_ops.transpose(labels))
  # Invert so we can select negatives only.
  adjacency_not = math_ops.logical_not(adjacency)

  batch_size = array_ops.size(labels)

  diff = margin - pairwise_distances
  mask = math_ops.cast(adjacency_not, dtype=dtypes.float32)
  # Safe maximum: Temporarily shift negative distances
  #   above zero before taking max.
  #     this is to take the max only among negatives.
  row_minimums = math_ops.reduce_min(diff, 1, keepdims=True)
  row_negative_maximums = math_ops.reduce_max(
      math_ops.multiply(diff - row_minimums, mask), 1,
      keepdims=True) + row_minimums

  # Compute the loss.
  # Keep track of matrix of maximums where M_ij = max(m_i, m_j)
  #   where m_i is the max of alpha - negative D_i's.
  # This matches the Caffe loss layer implementation at:
  #   https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp  # pylint: disable=line-too-long

  max_elements = math_ops.maximum(
      row_negative_maximums, array_ops.transpose(row_negative_maximums))
  diff_tiled = array_ops.tile(diff, [batch_size, 1])
  mask_tiled = array_ops.tile(mask, [batch_size, 1])
  max_elements_vect = array_ops.reshape(
      array_ops.transpose(max_elements), [-1, 1])

  loss_exp_left = array_ops.reshape(
      math_ops.reduce_sum(
          math_ops.multiply(
              math_ops.exp(diff_tiled - max_elements_vect), mask_tiled),
          1,
          keepdims=True), [batch_size, batch_size])

  loss_mat = max_elements + math_ops.log(
      loss_exp_left + array_ops.transpose(loss_exp_left))
  # Add the positive distance.
  loss_mat += pairwise_distances

  mask_positives = math_ops.cast(
      adjacency, dtype=dtypes.float32) - array_ops.diag(
          array_ops.ones([batch_size]))

  # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
  num_positives = math_ops.reduce_sum(mask_positives) / 2.0

  lifted_loss = math_ops.truediv(
      0.25 * math_ops.reduce_sum(
          math_ops.square(
              math_ops.maximum(
                  math_ops.multiply(loss_mat, mask_positives), 0.0))),
      num_positives,
      name='liftedstruct_loss')
  return lifted_loss
Exemplo n.º 25
0
def logical_not(x):
    x = np_array_ops.array(x, dtype=np.bool_)
    return math_ops.logical_not(x)
Exemplo n.º 26
0
def logical_not(x):
    x = np_array_ops.array(x, dtype=np.bool_)
    return np_utils.tensor_to_ndarray(math_ops.logical_not(x.data))
Exemplo n.º 27
0
def _wals_factorization_model_function(features, labels, mode, params):
    """Model function for the WALSFactorization estimator.

  Args:
    features: Dictionary of features. See WALSMatrixFactorization.
    labels: Must be None.
    mode: A model_fn.ModeKeys object.
    params: Dictionary of parameters containing arguments passed to the
      WALSMatrixFactorization constructor.

  Returns:
    A ModelFnOps object.

  Raises:
    ValueError: If `mode` is not recognized.
  """
    assert labels is None
    use_factors_weights_cache = (
        params["use_factors_weights_cache_for_training"]
        and mode == model_fn.ModeKeys.TRAIN)
    use_gramian_cache = (params["use_gramian_cache_for_training"]
                         and mode == model_fn.ModeKeys.TRAIN)
    max_sweeps = params["max_sweeps"]
    model = factorization_ops.WALSModel(
        params["num_rows"],
        params["num_cols"],
        params["embedding_dimension"],
        unobserved_weight=params["unobserved_weight"],
        regularization=params["regularization_coeff"],
        row_init=params["row_init"],
        col_init=params["col_init"],
        num_row_shards=params["num_row_shards"],
        num_col_shards=params["num_col_shards"],
        row_weights=params["row_weights"],
        col_weights=params["col_weights"],
        use_factors_weights_cache=use_factors_weights_cache,
        use_gramian_cache=use_gramian_cache)

    # Get input rows and cols. We either update rows or columns depending on
    # the value of row_sweep, which is maintained using a session hook.
    input_rows = features[WALSMatrixFactorization.INPUT_ROWS]
    input_cols = features[WALSMatrixFactorization.INPUT_COLS]

    # TRAIN mode:
    if mode == model_fn.ModeKeys.TRAIN:
        # Training consists of the following ops (controlled using a SweepHook).
        # Before a row sweep:
        #   row_update_prep_gramian_op
        #   initialize_row_update_op
        # During a row sweep:
        #   update_row_factors_op
        # Before a col sweep:
        #   col_update_prep_gramian_op
        #   initialize_col_update_op
        # During a col sweep:
        #   update_col_factors_op

        is_row_sweep_var = variable_scope.variable(
            True,
            trainable=False,
            name="is_row_sweep",
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
        is_sweep_done_var = variable_scope.variable(
            False,
            trainable=False,
            name="is_sweep_done",
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
        completed_sweeps_var = variable_scope.variable(
            0,
            trainable=False,
            name=WALSMatrixFactorization.COMPLETED_SWEEPS,
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
        loss_var = variable_scope.variable(
            0.,
            trainable=False,
            name=WALSMatrixFactorization.LOSS,
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
        # The root weighted squared error =
        #   \sqrt( \sum_{i,j} w_ij * (a_ij - r_ij)^2 / \sum_{i,j} w_ij )
        rwse_var = variable_scope.variable(
            0.,
            trainable=False,
            name=WALSMatrixFactorization.RWSE,
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])

        summary.scalar("loss", loss_var)
        summary.scalar("root_weighted_squared_error", rwse_var)
        summary.scalar("completed_sweeps", completed_sweeps_var)

        def create_axis_ops(sp_input, num_items, update_fn, axis_name):
            """Creates book-keeping and training ops for a given axis.

      Args:
        sp_input: A SparseTensor corresponding to the row or column batch.
        num_items: An integer, the total number of items of this axis.
        update_fn: A function that takes one argument (`sp_input`), and that
        returns a tuple of
          * new_factors: A float Tensor of the factor values after update.
          * update_op: a TensorFlow op which updates the factors.
          * loss: A float Tensor, the unregularized loss.
          * reg_loss: A float Tensor, the regularization loss.
          * sum_weights: A float Tensor, the sum of factor weights.
        axis_name: A string that specifies the name of the axis.

      Returns:
        A tuple consisting of:
          * reset_processed_items_op: A TensorFlow op, to be run before the
            beginning of any sweep. It marks all items as not-processed.
          * axis_train_op: A Tensorflow op, to be run during this axis' sweeps.
      """
            processed_items_init = array_ops.fill(dims=[num_items],
                                                  value=False)
            with ops.colocate_with(processed_items_init):
                processed_items = variable_scope.variable(
                    processed_items_init,
                    collections=[ops.GraphKeys.GLOBAL_VARIABLES],
                    trainable=False,
                    name="processed_" + axis_name)
            _, update_op, loss, reg, sum_weights = update_fn(sp_input)
            input_indices = sp_input.indices[:, 0]
            with ops.control_dependencies([
                    update_op,
                    state_ops.assign(loss_var, loss + reg),
                    state_ops.assign(rwse_var,
                                     math_ops.sqrt(loss / sum_weights))
            ]):
                with ops.colocate_with(processed_items):
                    update_processed_items = state_ops.scatter_update(
                        processed_items,
                        input_indices,
                        array_ops.ones_like(input_indices, dtype=dtypes.bool),
                        name="update_processed_{}_indices".format(axis_name))
                with ops.control_dependencies([update_processed_items]):
                    is_sweep_done = math_ops.reduce_all(processed_items)
                    axis_train_op = control_flow_ops.group(
                        state_ops.assign(is_sweep_done_var, is_sweep_done),
                        state_ops.assign_add(
                            completed_sweeps_var,
                            math_ops.cast(is_sweep_done, dtypes.int32)),
                        name="{}_sweep_train_op".format(axis_name))
            return processed_items.initializer, axis_train_op

        reset_processed_rows_op, row_train_op = create_axis_ops(
            input_rows, params["num_rows"],
            lambda x: model.update_row_factors(sp_input=x,
                                               transpose_input=False), "rows")
        reset_processed_cols_op, col_train_op = create_axis_ops(
            input_cols, params["num_cols"],
            lambda x: model.update_col_factors(sp_input=x,
                                               transpose_input=True), "cols")
        switch_op = control_flow_ops.group(state_ops.assign(
            is_row_sweep_var, math_ops.logical_not(is_row_sweep_var)),
                                           reset_processed_rows_op,
                                           reset_processed_cols_op,
                                           name="sweep_switch_op")
        row_prep_ops = [
            model.row_update_prep_gramian_op, model.initialize_row_update_op
        ]
        col_prep_ops = [
            model.col_update_prep_gramian_op, model.initialize_col_update_op
        ]
        init_op = model.worker_init
        sweep_hook = _SweepHook(is_row_sweep_var, is_sweep_done_var, init_op,
                                row_prep_ops, col_prep_ops, row_train_op,
                                col_train_op, switch_op)
        global_step_hook = _IncrementGlobalStepHook()
        training_hooks = [sweep_hook, global_step_hook]
        if max_sweeps is not None:
            training_hooks.append(_StopAtSweepHook(max_sweeps))

        return model_fn.ModelFnOps(mode=model_fn.ModeKeys.TRAIN,
                                   predictions={},
                                   loss=loss_var,
                                   eval_metric_ops={},
                                   train_op=control_flow_ops.no_op(),
                                   training_hooks=training_hooks)

    # INFER mode
    elif mode == model_fn.ModeKeys.INFER:
        projection_weights = features.get(
            WALSMatrixFactorization.PROJECTION_WEIGHTS)

        def get_row_projection():
            return model.project_row_factors(
                sp_input=input_rows,
                projection_weights=projection_weights,
                transpose_input=False)

        def get_col_projection():
            return model.project_col_factors(
                sp_input=input_cols,
                projection_weights=projection_weights,
                transpose_input=True)

        predictions = {
            WALSMatrixFactorization.PROJECTION_RESULT:
            control_flow_ops.cond(
                features[WALSMatrixFactorization.PROJECT_ROW],
                get_row_projection, get_col_projection)
        }

        return model_fn.ModelFnOps(mode=model_fn.ModeKeys.INFER,
                                   predictions=predictions,
                                   loss=None,
                                   eval_metric_ops={},
                                   train_op=control_flow_ops.no_op(),
                                   training_hooks=[])

    # EVAL mode
    elif mode == model_fn.ModeKeys.EVAL:

        def get_row_loss():
            _, _, loss, reg, _ = model.update_row_factors(
                sp_input=input_rows, transpose_input=False)
            return loss + reg

        def get_col_loss():
            _, _, loss, reg, _ = model.update_col_factors(sp_input=input_cols,
                                                          transpose_input=True)
            return loss + reg

        loss = control_flow_ops.cond(
            features[WALSMatrixFactorization.PROJECT_ROW], get_row_loss,
            get_col_loss)
        return model_fn.ModelFnOps(mode=model_fn.ModeKeys.EVAL,
                                   predictions={},
                                   loss=loss,
                                   eval_metric_ops={},
                                   train_op=control_flow_ops.no_op(),
                                   training_hooks=[])

    else:
        raise ValueError("mode=%s is not recognized." % str(mode))
Exemplo n.º 28
0
 def __ne__(self, other):
     eq = self.__eq__(other)
     if isinstance(eq, ops.Tensor):
         return math_ops.logical_not(eq)
     else:
         return not eq
Exemplo n.º 29
0
 def f(x):
     if x.dtype == dtypes.bool:
         return math_ops.logical_not(x)
     return bitwise_ops.invert(x)
def _logical_not(x):
    """Convenience function which attempts to statically apply `logical_not`."""
    x_ = _static_value(x)
    if x_ is None:
        return math_ops.logical_not(x)
    return constant_op.constant(np.logical_not(x_))
Exemplo n.º 31
0
def assert_equal(x, y, data=None, summarize=None, message=None, name=None):
    """Assert the condition `x == y` holds element-wise.

  Example of adding a dependency to an operation:

  ```python
  with tf.control_dependencies([tf.assert_equal(x, y)]):
    output = tf.reduce_sum(x)
  ```

  This condition holds if for every pair of (possibly broadcast) elements
  `x[i]`, `y[i]`, we have `x[i] == y[i]`.
  If both `x` and `y` are empty, this is trivially satisfied.

  Args:
    x:  Numeric `Tensor`.
    y:  Numeric `Tensor`, same dtype as and broadcastable to `x`.
    data:  The tensors to print out if the condition is False.  Defaults to
      error message and first few entries of `x`, `y`.
    summarize: Print this many entries of each tensor.
    message: A string to prefix to the default message.
    name: A name for this operation (optional).  Defaults to "assert_equal".

  Returns:
    Op that raises `InvalidArgumentError` if `x == y` is False.
    @compatibility{eager} returns None

  Raises:
    InvalidArgumentError: if the check can be performed immediately and
      `x == y` is False. The check can be performed immediately during eager
      execution or if `x` and `y` are statically known.
  """
    message = message or ''
    with ops.name_scope(name, 'assert_equal', [x, y, data]):
        x = ops.convert_to_tensor(x, name='x')
        y = ops.convert_to_tensor(y, name='y')

        if context.executing_eagerly():
            eq = math_ops.equal(x, y)
            condition = math_ops.reduce_all(eq)
            if not condition:
                # Prepare a message with first elements of x and y.
                summary_msg = ''
                # Default to printing 3 elements like control_flow_ops.Assert (used
                # by graph mode) does.
                summarize = 3 if summarize is None else summarize
                if summarize:
                    # reshape((-1,)) is the fastest way to get a flat array view.
                    x_np = x.numpy().reshape((-1, ))
                    y_np = y.numpy().reshape((-1, ))
                    x_sum = min(x_np.size, summarize)
                    y_sum = min(y_np.size, summarize)
                    summary_msg = ('First %d elements of x:\n%s\n'
                                   'First %d elements of y:\n%s\n' %
                                   (x_sum, x_np[:x_sum], y_sum, y_np[:y_sum]))

                index_and_values_str = ''
                if x.shape == y.shape and x.shape.as_list():
                    # If the shapes of x and y are the same (and not scalars),
                    # Get the values that actually differed and their indices.
                    # If shapes are different this information is more confusing
                    # than useful.
                    mask = math_ops.logical_not(eq)
                    indices = array_ops.where(mask)
                    indices_np = indices.numpy()
                    x_vals = array_ops.boolean_mask(x, mask)
                    y_vals = array_ops.boolean_mask(y, mask)
                    summarize = min(summarize, indices_np.shape[0])
                    index_and_values_str = (
                        'Indices of first %s different values:\n%s\n'
                        'Corresponding x values:\n%s\n'
                        'Corresponding y values:\n%s\n' %
                        (summarize, indices_np[:summarize],
                         x_vals.numpy().reshape(
                             (-1, ))[:summarize], y_vals.numpy().reshape(
                                 (-1, ))[:summarize]))

                raise errors.InvalidArgumentError(
                    node_def=None,
                    op=None,
                    message=(
                        '%s\nCondition x == y did not hold.\n%s%s' %
                        (message or '', index_and_values_str, summary_msg)))
            return

        if data is None:
            data = [
                message, 'Condition x == y did not hold element-wise:',
                'x (%s) = ' % x.name, x,
                'y (%s) = ' % y.name, y
            ]
        condition = math_ops.reduce_all(math_ops.equal(x, y))
        x_static = tensor_util.constant_value(x)
        y_static = tensor_util.constant_value(y)
        if x_static is not None and y_static is not None:
            condition_static = (x_static == y_static).all()
            _assert_static(condition_static, data)
        return control_flow_ops.Assert(condition, data, summarize=summarize)
Exemplo n.º 32
0
def update_confusion_matrix_variables(variables_to_update,
                                      y_true,
                                      y_pred,
                                      thresholds,
                                      top_k=None,
                                      class_id=None,
                                      sample_weight=None,
                                      multi_label=False,
                                      label_weights=None):
    """Returns op to update the given confusion matrix variables.

  For every pair of values in y_true and y_pred:

  true_positive: y_true == True and y_pred > thresholds
  false_negatives: y_true == True and y_pred <= thresholds
  true_negatives: y_true == False and y_pred <= thresholds
  false_positive: y_true == False and y_pred > thresholds

  The results will be weighted and added together. When multiple thresholds are
  provided, we will repeat the same for every threshold.

  For estimation of these metrics over a stream of data, the function creates an
  `update_op` operation that updates the given variables.

  If `sample_weight` is `None`, weights default to 1.
  Use weights of 0 to mask values.

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A float value, float tensor, python list, or tuple of float
      thresholds in `[0, 1]`, or NEG_INF (used when top_k is set).
    top_k: Optional int, indicates that the positive labels should be limited to
      the top k predictions.
    class_id: Optional int, limits the prediction and labels to the class
      specified by this argument.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `y_true` dimension).
    multi_label: Optional boolean indicating whether multidimensional
      prediction/labels should be treated as multilabel responses, or flattened
      into a single label. When True, the valus of `variables_to_update` must
      have a second dimension equal to the number of labels in y_true and
      y_pred, and those tensors must not be RaggedTensors.
    label_weights: (optional) tensor of non-negative weights for multilabel
      data. The weights are applied when calculating TP, FP, FN, and TN without
      explicit multilabel handling (i.e. when the data is to be flattened).

  Returns:
    Update op.

  Raises:
    ValueError: If `y_pred` and `y_true` have mismatched shapes, or if
      `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if
      `variables_to_update` contains invalid keys.
  """
    if multi_label and label_weights is not None:
        raise ValueError(
            '`label_weights` for multilabel data should be handled '
            'outside of `update_confusion_matrix_variables` when '
            '`multi_label` is True.')
    if variables_to_update is None:
        return
    if not any(key
               for key in variables_to_update if key in list(ConfusionMatrix)):
        raise ValueError(
            'Please provide at least one valid confusion matrix '
            'variable to update. Valid variable key options are: "{}". '
            'Received: "{}"'.format(list(ConfusionMatrix),
                                    variables_to_update.keys()))

    variable_dtype = list(variables_to_update.values())[0].dtype

    y_true = math_ops.cast(y_true, dtype=variable_dtype)
    y_pred = math_ops.cast(y_pred, dtype=variable_dtype)
    thresholds = ops.convert_to_tensor_v2(thresholds, dtype=variable_dtype)
    num_thresholds = thresholds.shape[0]
    if multi_label:
        one_thresh = math_ops.equal(math_ops.cast(1, dtype=dtypes.int32),
                                    array_ops.rank(thresholds),
                                    name='one_set_of_thresholds_cond')
    else:
        [y_pred, y_true
         ], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true],
                                                             sample_weight)
        one_thresh = math_ops.cast(True, dtype=dtypes.bool)

    invalid_keys = [
        key for key in variables_to_update if key not in list(ConfusionMatrix)
    ]
    if invalid_keys:
        raise ValueError(
            'Invalid keys: {}. Valid variable key options are: "{}"'.format(
                invalid_keys, list(ConfusionMatrix)))

    with ops.control_dependencies([
            check_ops.assert_greater_equal(y_pred,
                                           math_ops.cast(0.0,
                                                         dtype=y_pred.dtype),
                                           message='predictions must be >= 0'),
            check_ops.assert_less_equal(y_pred,
                                        math_ops.cast(1.0, dtype=y_pred.dtype),
                                        message='predictions must be <= 1')
    ]):
        if sample_weight is None:
            y_pred, y_true = tf_losses_utils.squeeze_or_expand_dimensions(
                y_pred, y_true)
        else:
            y_pred, y_true, sample_weight = (
                tf_losses_utils.squeeze_or_expand_dimensions(
                    y_pred, y_true, sample_weight=sample_weight))
    y_pred.shape.assert_is_compatible_with(y_true.shape)

    if top_k is not None:
        y_pred = _filter_top_k(y_pred, top_k)
    if class_id is not None:
        y_true = y_true[..., class_id]
        y_pred = y_pred[..., class_id]

    pred_shape = array_ops.shape(y_pred)
    num_predictions = pred_shape[0]
    if y_pred.shape.ndims == 1:
        num_labels = 1
    else:
        num_labels = gen_math_ops.Prod(input=pred_shape[1:], axis=0)
    thresh_label_tile = control_flow_ops.cond(
        one_thresh, lambda: num_labels,
        lambda: math_ops.cast(1, dtype=dtypes.int32))

    # Reshape predictions and labels, adding a dim for thresholding.
    if multi_label:
        predictions_extra_dim = array_ops.expand_dims(y_pred, 0)
        labels_extra_dim = array_ops.expand_dims(
            math_ops.cast(y_true, dtype=dtypes.bool), 0)
    else:
        # Flatten predictions and labels when not multilabel.
        predictions_extra_dim = array_ops.reshape(y_pred, [1, -1])
        labels_extra_dim = array_ops.reshape(
            math_ops.cast(y_true, dtype=dtypes.bool), [1, -1])

    # Tile the thresholds for every prediction.
    if multi_label:
        thresh_pretile_shape = [num_thresholds, 1, -1]
        thresh_tiles = [1, num_predictions, thresh_label_tile]
        data_tiles = [num_thresholds, 1, 1]
    else:
        thresh_pretile_shape = [num_thresholds, -1]
        thresh_tiles = [1, num_predictions * num_labels]
        data_tiles = [num_thresholds, 1]

    thresh_tiled = array_ops.tile(
        array_ops.reshape(thresholds, thresh_pretile_shape),
        array_ops.stack(thresh_tiles))

    # Tile the predictions for every threshold.
    preds_tiled = array_ops.tile(predictions_extra_dim, data_tiles)

    # Compare predictions and threshold.
    pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled)

    # Tile labels by number of thresholds
    label_is_pos = array_ops.tile(labels_extra_dim, data_tiles)

    if sample_weight is not None:
        sample_weight = weights_broadcast_ops.broadcast_weights(
            math_ops.cast(sample_weight, dtype=variable_dtype), y_pred)
        weights_tiled = array_ops.tile(
            array_ops.reshape(sample_weight, thresh_tiles), data_tiles)
    else:
        weights_tiled = None

    if label_weights is not None and not multi_label:
        label_weights = array_ops.expand_dims(label_weights, 0)
        label_weights = weights_broadcast_ops.broadcast_weights(
            label_weights, y_pred)
        label_weights_tiled = array_ops.tile(
            array_ops.reshape(label_weights, thresh_tiles), data_tiles)
        if weights_tiled is None:
            weights_tiled = label_weights_tiled
        else:
            weights_tiled = math_ops.multiply(weights_tiled,
                                              label_weights_tiled)

    update_ops = []

    def weighted_assign_add(label, pred, weights, var):
        label_and_pred = math_ops.cast(math_ops.logical_and(label, pred),
                                       dtype=var.dtype)
        if weights is not None:
            label_and_pred *= math_ops.cast(weights, dtype=var.dtype)
        return var.assign_add(math_ops.reduce_sum(label_and_pred, 1))

    loop_vars = {
        ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos),
    }
    update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
    update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update
    update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update

    if update_fn or update_tn:
        pred_is_neg = math_ops.logical_not(pred_is_pos)
        loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos,
                                                      pred_is_neg)

    if update_fp or update_tn:
        label_is_neg = math_ops.logical_not(label_is_pos)
        loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg,
                                                      pred_is_pos)
        if update_tn:
            loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg,
                                                         pred_is_neg)

    for matrix_cond, (label, pred) in loop_vars.items():

        if matrix_cond in variables_to_update:
            update_ops.append(
                weighted_assign_add(label, pred, weights_tiled,
                                    variables_to_update[matrix_cond]))

    return control_flow_ops.group(update_ops)
Exemplo n.º 33
0
        def body(time, outputs_ta, state, inputs, finished, sequence_lengths,
                 input_ta, dec_finished_ta, sample_id_ta):
            """Internal while_loop body.

      Args:
        time: scalar int32 tensor.
        outputs_ta: structure of TensorArray.
        state: (structure of) state tensors and TensorArrays.
        inputs: (structure of) input tensors.
        finished: bool tensor (keeping track of what's finished).
        sequence_lengths: int32 tensor (keeping track of time of finish).

      Returns:
        `(time + 1, outputs_ta, next_state, next_inputs, next_finished,
          next_sequence_lengths)`.
        ```
      """
            # for generate autoregressive mask
            # autoregressive_mask = sample_id_ta.stack()
            # _, seq_len = state.alignments.shape.as_list()
            # autoregressive_mask = tf.one_hot(indices=autoregressive_mask, depth=seq_len, dtype=tf.int32)
            # autoregressive_mask = 1 - tf.reduce_sum(autoregressive_mask, axis=0, keepdims=True)

            (next_outputs, decoder_state, next_inputs,
             decoder_finished) = decoder.step(time, inputs, state)

            # parameter for generate autoregressive mask
            sample_id_ta = sample_id_ta.write(time, next_outputs.sample_id)

            input_ta = input_ta.write(time, inputs)
            dec_finished_ta = dec_finished_ta.write(time, decoder_finished)

            if decoder.tracks_own_finished:
                next_finished = decoder_finished
            else:
                next_finished = math_ops.logical_or(decoder_finished, finished)
            next_sequence_lengths = array_ops.where(
                math_ops.logical_not(finished),
                array_ops.fill(array_ops.shape(sequence_lengths), time + 1),
                sequence_lengths)

            nest.assert_same_structure(state, decoder_state)
            nest.assert_same_structure(outputs_ta, next_outputs)
            nest.assert_same_structure(inputs, next_inputs)

            # Zero out output values past finish
            if impute_finished:
                emit = nest.map_structure(
                    lambda out, zero: array_ops.where(finished, zero, out),
                    next_outputs, zero_outputs)
            else:
                emit = next_outputs

            # Copy through states past finish
            def _maybe_copy_state(new, cur):
                # TensorArrays and scalar states get passed through.
                if isinstance(cur, tensor_array_ops.TensorArray):
                    pass_through = True
                else:
                    new.set_shape(cur.shape)
                    pass_through = (new.shape.ndims == 0)
                return new if pass_through else array_ops.where(
                    finished, cur, new)

            if impute_finished:
                next_state = nest.map_structure(_maybe_copy_state,
                                                decoder_state, state)
            else:
                next_state = decoder_state

            outputs_ta = nest.map_structure(
                lambda ta, out: ta.write(time, out), outputs_ta, emit)

            return (time + 1, outputs_ta, next_state, next_inputs,
                    next_finished, next_sequence_lengths, input_ta,
                    dec_finished_ta, sample_id_ta)
Exemplo n.º 34
0
 def condition(unused_time, unused_outputs_ta, unused_state, unused_inputs,
               finished):
   return math_ops.logical_not(math_ops.reduce_all(finished))
def triplet_loss_adapted_from_tf(y_true, y_pred):
    del y_true
    margin = 1.
    labels = y_pred[:, :1]

    labels = tf.cast(labels, dtype='int32')

    embeddings = y_pred[:, 1:]

    ### Code from Tensorflow function [tf.contrib.losses.metric_learning.triplet_semihard_loss] starts here:

    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    # lshape=array_ops.shape(labels)
    # assert lshape.shape == 1
    # labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    # global batch_size
    batch_size = array_ops.size(labels)  # was 'array_ops.size(labels)'

    # Compute the mask.
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile,
            array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1])))
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32),
                                1,
                                keepdims=True), 0.0), [batch_size, batch_size])
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    mask = math_ops.cast(mask, dtype=dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = array_ops.where(mask_final, negatives_outside,
                                          negatives_inside)

    loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = math_ops.reduce_sum(mask_positives)

    semi_hard_triplet_loss_distance = math_ops.truediv(
        math_ops.reduce_sum(
            math_ops.maximum(math_ops.multiply(loss_mat, mask_positives),
                             0.0)),
        num_positives,
        name='triplet_semihard_loss')

    ### Code from Tensorflow function semi-hard triplet loss ENDS here.
    return semi_hard_triplet_loss_distance
Exemplo n.º 36
0
def lifted_struct_loss(labels, embeddings, margin=1.0):
    """Computes the lifted structured loss.
      The loss encourages the positive distances (between a pair of embeddings
      with the same labels) to be smaller than any negative distances (between a
      pair of embeddings with different labels) in the mini-batch in a way
      that is differentiable with respect to the embedding vectors.
      See: https://arxiv.org/abs/1511.06452.
      Args:
        labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
          multiclass integer labels.
        embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not
          be l2 normalized.
        margin: Float, margin term in the loss definition.
      Returns:
        lifted_loss: tf.float32 scalar.
    """
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pairwise_distances = pairwise_distance(embeddings)

    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    diff = margin - pairwise_distances
    mask = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    # Safe maximum: Temporarily shift negative distances
    #   above zero before taking max.
    #     this is to take the max only among negatives.
    row_minimums = math_ops.reduce_min(diff, 1, keep_dims=True)
    row_negative_maximums = math_ops.reduce_max(
        math_ops.multiply(diff - row_minimums,
                          mask), 1, keep_dims=True) + row_minimums

    max_elements = math_ops.maximum(row_negative_maximums,
                                    array_ops.transpose(row_negative_maximums))
    diff_tiled = array_ops.tile(diff, [batch_size, 1])
    mask_tiled = array_ops.tile(mask, [batch_size, 1])
    max_elements_vect = array_ops.reshape(array_ops.transpose(max_elements),
                                          [-1, 1])

    loss_exp_left = array_ops.reshape(
        math_ops.reduce_sum(math_ops.multiply(
            math_ops.exp(diff_tiled - max_elements_vect), mask_tiled),
                            1,
                            keep_dims=True), [batch_size, batch_size])

    loss_mat = max_elements + math_ops.log(loss_exp_left +
                                           array_ops.transpose(loss_exp_left))
    # Add the positive distance.
    loss_mat += pairwise_distances

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
    num_positives = math_ops.reduce_sum(mask_positives) / 2.0

    lifted_loss = math_ops.truediv(0.25 * math_ops.reduce_sum(
        math_ops.square(
            math_ops.maximum(math_ops.multiply(loss_mat, mask_positives),
                             0.0))),
                                   num_positives,
                                   name='liftedstruct_loss')
    return lifted_loss
Exemplo n.º 37
0
def triplet_semihard_loss(labels, embeddings, margin=1.0):
  """Computes the triplet loss with semi-hard negative mining.

  The loss encourages the positive distances (between a pair of embeddings with
  the same labels) to be smaller than the minimum negative distance among
  which are at least greater than the positive distance plus the margin constant
  (called semi-hard negative) in the mini-batch. If no such negative exists,
  uses the largest negative distance instead.
  See: https://arxiv.org/abs/1503.03832.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    triplet_loss: tf.float32 scalar.
  """
  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
  lshape = array_ops.shape(labels)
  assert lshape.shape == 1
  labels = array_ops.reshape(labels, [lshape[0], 1])

  # Build pairwise squared distance matrix.
  pdist_matrix = pairwise_distance(embeddings, squared=True)
  # Build pairwise binary adjacency matrix.
  adjacency = math_ops.equal(labels, array_ops.transpose(labels))
  # Invert so we can select negatives only.
  adjacency_not = math_ops.logical_not(adjacency)

  batch_size = array_ops.size(labels)

  # Compute the mask.
  pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
  mask = math_ops.logical_and(
      array_ops.tile(adjacency_not, [batch_size, 1]),
      math_ops.greater(
          pdist_matrix_tile, array_ops.reshape(
              array_ops.transpose(pdist_matrix), [-1, 1])))
  mask_final = array_ops.reshape(
      math_ops.greater(
          math_ops.reduce_sum(
              math_ops.cast(mask, dtype=dtypes.float32), 1, keepdims=True),
          0.0), [batch_size, batch_size])
  mask_final = array_ops.transpose(mask_final)

  adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
  mask = math_ops.cast(mask, dtype=dtypes.float32)

  # negatives_outside: smallest D_an where D_an > D_ap.
  negatives_outside = array_ops.reshape(
      masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
  negatives_outside = array_ops.transpose(negatives_outside)

  # negatives_inside: largest D_an.
  negatives_inside = array_ops.tile(
      masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
  semi_hard_negatives = array_ops.where(
      mask_final, negatives_outside, negatives_inside)

  loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

  mask_positives = math_ops.cast(
      adjacency, dtype=dtypes.float32) - array_ops.diag(
          array_ops.ones([batch_size]))

  # In lifted-struct, the authors multiply 0.5 for upper triangular
  #   in semihard, they take all positive pairs except the diagonal.
  num_positives = math_ops.reduce_sum(mask_positives)

  triplet_loss = math_ops.truediv(
      math_ops.reduce_sum(
          math_ops.maximum(
              math_ops.multiply(loss_mat, mask_positives), 0.0)),
      num_positives,
      name='triplet_semihard_loss')

  return triplet_loss
Exemplo n.º 38
0
def lifted_struct_loss(y_true, y_preds, margin=1.0):
    """Computes the lifted structured loss.

    The loss encourages the positive distances (between a pair of embeddings
    with the same labels) to be smaller than any negative distances (between a
    pair of embeddings with different labels) in the mini-batch in a way
    that is differentiable with respect to the embedding vectors.
    See: https://arxiv.org/abs/1511.06452.

    Args:
      labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
        multiclass integer labels.
      embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not
        be l2 normalized.
      margin: Float, margin term in the loss definition.

    Returns:
      lifted_loss: tf.float32 scalar.
    """
    labels = y_true
    embeddings = y_preds
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    # assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pairwise_distances = pairwise_distance(embeddings)

    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    diff = margin - pairwise_distances
    mask = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    # Safe maximum: Temporarily shift negative distances
    #   above zero before taking max.
    #     this is to take the max only among negatives.
    row_minimums = math_ops.reduce_min(diff, 1, keepdims=True)
    row_negative_maximums = (math_ops.reduce_max(
        math_ops.multiply(diff - row_minimums, mask), 1, keepdims=True) +
                             row_minimums)

    # Compute the loss.
    # Keep track of matrix of maximums where M_ij = max(m_i, m_j)
    #   where m_i is the max of alpha - negative D_i's.
    # This matches the Caffe loss layer implementation at:
    #   https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp  # pylint: disable=line-too-long

    max_elements = math_ops.maximum(row_negative_maximums,
                                    array_ops.transpose(row_negative_maximums))
    diff_tiled = array_ops.tile(diff, [batch_size, 1])
    mask_tiled = array_ops.tile(mask, [batch_size, 1])
    max_elements_vect = array_ops.reshape(array_ops.transpose(max_elements),
                                          [-1, 1])

    loss_exp_left = array_ops.reshape(
        math_ops.reduce_sum(
            math_ops.multiply(math_ops.exp(diff_tiled - max_elements_vect),
                              mask_tiled),
            1,
            keepdims=True,
        ),
        [batch_size, batch_size],
    )

    loss_mat = max_elements + math_ops.log(loss_exp_left +
                                           array_ops.transpose(loss_exp_left))
    # Add the positive distance.
    loss_mat += pairwise_distances

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
    num_positives = math_ops.reduce_sum(mask_positives) / 2.0

    lifted_loss = math_ops.truediv(
        0.25 * math_ops.reduce_sum(
            math_ops.square(
                math_ops.maximum(math_ops.multiply(loss_mat, mask_positives),
                                 0.0))),
        num_positives,
        name='liftedstruct_loss',
    )
    return lifted_loss
Exemplo n.º 39
0
def triplet_semihard_without_grad(embeddings_labels, sigma1=1.0, sigma2=1.0):
    '''
    This function is used to calculate the probabilities for positive and negative weighted embeddings
    Input:
    embeddings: the embeddings which represents the images in other space where distance is interpreable
    labels: corresponding label for each embeddings
    sigma: tells how much weight to be assigned based on distance. Low sigma will assign high weight to
            nearby sample.
    
    Output: positive and negative probabilties
    '''

    # First we extract the labels and embeddings
    labels = embeddings_labels[:, :1]

    labels = tf.cast(labels, dtype='int32')

    embeddings = embeddings_labels[:, 1:]

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)

    # This matrix will have 1 when labels are same and 0 when they are different
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    # This matrix will have 1 when labels are different and 0 when they are same
    adjacency_not = math_ops.logical_not(adjacency)

    #Infer batch size
    batch_size = array_ops.size(labels)

    # For calculating positive probability
    affinity = math_ops.exp(-pdist_matrix / sigma1) - array_ops.diag(
        array_ops.ones([batch_size]))
    d_a_p = math_ops.multiply(math_ops.cast(adjacency, dtype=dtypes.float32),
                              affinity)
    pos_prob = math_ops.divide(d_a_p,
                               tf.reduce_sum(d_a_p, axis=1, keepdims=True))

    # Set pos-prob of nearest to 1 in case of nan.
    mask_is_nan = tf.tile(
        tf.math.is_nan(tf.reduce_sum(pos_prob, axis=1, keepdims=True)),
        [1, embeddings.shape.as_list()[0]])

    pdist_matrix_pos = math_ops.multiply(
        math_ops.cast(adjacency, dtype=dtypes.float32), pdist_matrix)

    select_nearest = tf.cast(tf.math.equal(
        pdist_matrix_pos, tf.reduce_max(pdist_matrix_pos,
                                        axis=1,
                                        keepdims=True)),
                             dtype=dtypes.float32)

    pos_prob = array_ops.where(mask_is_nan, select_nearest, pos_prob)

    # For calculating negative probability
    affinity = math_ops.exp(-pdist_matrix / sigma2) - array_ops.diag(
        array_ops.ones([batch_size]))
    d_a_n = math_ops.multiply(
        math_ops.cast(adjacency_not, dtype=dtypes.float32), affinity)
    neg_prob = math_ops.divide(d_a_n,
                               tf.reduce_sum(d_a_n, axis=1, keepdims=True))

    # Set neg-prob of nearest to 1 in case of nan.
    mask_is_nan = tf.tile(
        tf.math.is_nan(tf.reduce_sum(neg_prob, axis=1, keepdims=True)),
        [1, embeddings.shape.as_list()[0]])
    pdist_matrix_neg = math_ops.multiply(
        math_ops.cast(adjacency, dtype=dtypes.float32), pdist_matrix)

    select_nearest = tf.cast(tf.math.equal(
        pdist_matrix_pos, tf.reduce_max(pdist_matrix_neg,
                                        axis=1,
                                        keepdims=True)),
                             dtype=dtypes.float32)
    #     print (select_nearest)
    neg_prob = array_ops.where(mask_is_nan, select_nearest, neg_prob)

    return pos_prob, neg_prob
Exemplo n.º 40
0
def _confusion_matrix_at_thresholds(labels,
                                    predictions,
                                    thresholds,
                                    weights=None):
    with ops.control_dependencies([
            check_ops.assert_greater_equal(
                predictions,
                math_ops.cast(0.0, dtype=predictions.dtype),
                message='predictions must be in [0, 1]'),
            check_ops.assert_less_equal(
                predictions,
                math_ops.cast(1.0, dtype=predictions.dtype),
                message='predictions must be in [0, 1]')
    ]):
        predictions, labels, weights = _remove_squeezable_dimensions(
            predictions=math_ops.to_float(predictions),
            labels=math_ops.cast(labels, dtype=dtypes.bool),
            weights=weights)

    num_thresholds = len(thresholds)

    # Reshape predictions and labels.
    predictions_2d = array_ops.reshape(predictions, [-1, 1])
    labels_2d = array_ops.reshape(math_ops.cast(labels, dtype=dtypes.bool),
                                  [1, -1])

    # Use static shape if known.
    num_predictions = predictions_2d.get_shape().as_list()[0]

    # Otherwise use dynamic shape.
    if num_predictions is None:
        num_predictions = array_ops.shape(predictions_2d)[0]
    thresh_tiled = array_ops.tile(
        array_ops.expand_dims(array_ops.constant(thresholds), [1]),
        array_ops.stack([1, num_predictions]))

    # Tile the predictions after threshold them across different thresholds.
    pred_is_pos = math_ops.greater(
        array_ops.tile(array_ops.transpose(predictions_2d),
                       [num_thresholds, 1]), thresh_tiled)
    pred_is_neg = math_ops.logical_not(pred_is_pos)
    label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1])
    label_is_neg = math_ops.logical_not(label_is_pos)

    if weights is not None:
        weights = weights_broadcast_ops.broadcast_weights(
            math_ops.to_float(weights), predictions)
        weights_tiled = array_ops.tile(array_ops.reshape(weights, [1, -1]),
                                       [num_thresholds, 1])
        thresh_tiled.get_shape().assert_is_compatible_with(
            weights_tiled.get_shape())
    else:
        weights_tiled = None

    values = {}

    # tp
    is_true_positive = math_ops.to_float(
        math_ops.logical_and(label_is_pos, pred_is_pos))
    if weights_tiled is not None:
        is_true_positive *= weights_tiled
    values['tp'] = math_ops.reduce_sum(is_true_positive, 1)

    # fn
    is_false_negative = math_ops.to_float(
        math_ops.logical_and(label_is_pos, pred_is_neg))
    if weights_tiled is not None:
        is_false_negative *= weights_tiled
    values['fn'] = math_ops.reduce_sum(is_false_negative, 1)

    # tn
    is_true_negative = math_ops.to_float(
        math_ops.logical_and(label_is_neg, pred_is_neg))
    if weights_tiled is not None:
        is_true_negative *= weights_tiled
    values['tn'] = math_ops.reduce_sum(is_true_negative, 1)

    # fp
    is_false_positive = math_ops.to_float(
        math_ops.logical_and(label_is_neg, pred_is_pos))
    if weights_tiled is not None:
        is_false_positive *= weights_tiled
    values['fp'] = math_ops.reduce_sum(is_false_positive, 1)

    return values
Exemplo n.º 41
0
def assert_equal(x, y, data=None, summarize=None, message=None, name=None):
  """Assert the condition `x == y` holds element-wise.

  Example of adding a dependency to an operation:

  ```python
  with tf.control_dependencies([tf.assert_equal(x, y)]):
    output = tf.reduce_sum(x)
  ```

  This condition holds if for every pair of (possibly broadcast) elements
  `x[i]`, `y[i]`, we have `x[i] == y[i]`.
  If both `x` and `y` are empty, this is trivially satisfied.

  Args:
    x:  Numeric `Tensor`.
    y:  Numeric `Tensor`, same dtype as and broadcastable to `x`.
    data:  The tensors to print out if the condition is False.  Defaults to
      error message and first few entries of `x`, `y`.
    summarize: Print this many entries of each tensor.
    message: A string to prefix to the default message.
    name: A name for this operation (optional).  Defaults to "assert_equal".

  Returns:
    Op that raises `InvalidArgumentError` if `x == y` is False.
    @compatibility{eager} returns None

  Raises:
    InvalidArgumentError: if the check can be performed immediately and
      `x == y` is False. The check can be performed immediately during eager
      execution or if `x` and `y` are statically known.
  """
  message = message or ''
  with ops.name_scope(name, 'assert_equal', [x, y, data]):
    x = ops.convert_to_tensor(x, name='x')
    y = ops.convert_to_tensor(y, name='y')

    if context.executing_eagerly():
      eq = math_ops.equal(x, y)
      condition = math_ops.reduce_all(eq)
      if not condition:
        # Prepare a message with first elements of x and y.
        summary_msg = ''
        # Default to printing 3 elements like control_flow_ops.Assert (used
        # by graph mode) does.
        summarize = 3 if summarize is None else summarize
        if summarize:
          # reshape((-1,)) is the fastest way to get a flat array view.
          x_np = x.numpy().reshape((-1,))
          y_np = y.numpy().reshape((-1,))
          x_sum = min(x_np.size, summarize)
          y_sum = min(y_np.size, summarize)
          summary_msg = ('First %d elements of x:\n%s\n'
                         'First %d elements of y:\n%s\n' %
                         (x_sum, x_np[:x_sum],
                          y_sum, y_np[:y_sum]))

        index_and_values_str = ''
        if x.shape == y.shape:
          # If the shapes of x and y are the same,
          # Get the values that actually differed and their indices.
          # If shapes are different this information is more confusing
          # than useful.
          mask = math_ops.logical_not(eq)
          indices = array_ops.where(mask)
          indices_np = indices.numpy()
          x_vals = array_ops.boolean_mask(x, mask)
          y_vals = array_ops.boolean_mask(y, mask)
          summarize = min(summarize, indices_np.shape[0])
          index_and_values_str = (
              'Indices of first %s different values:\n%s\n'
              'Corresponding x values:\n%s\n'
              'Corresponding y values:\n%s\n' %
              (summarize, indices_np[:summarize],
               x_vals.numpy().reshape((-1,))[:summarize],
               y_vals.numpy().reshape((-1,))[:summarize]))

        raise errors.InvalidArgumentError(
            node_def=None, op=None,
            message=('%s\nCondition x == y did not hold.\n%s%s' %
                     (message or '', index_and_values_str, summary_msg)))
      return

    if data is None:
      data = [
          message,
          'Condition x == y did not hold element-wise:',
          'x (%s) = ' % x.name, x,
          'y (%s) = ' % y.name, y
      ]
    condition = math_ops.reduce_all(math_ops.equal(x, y))
    x_static = tensor_util.constant_value(x)
    y_static = tensor_util.constant_value(y)
    if x_static is not None and y_static is not None:
      condition_static = (x_static == y_static).all()
      _assert_static(condition_static, data)
    return control_flow_ops.Assert(condition, data, summarize=summarize)
Exemplo n.º 42
0
  def test_sweeps(self):
    is_row_sweep_var = variables.Variable(True)
    is_sweep_done_var = variables.Variable(False)
    init_done = variables.Variable(False)
    row_prep_done = variables.Variable(False)
    col_prep_done = variables.Variable(False)
    row_train_done = variables.Variable(False)
    col_train_done = variables.Variable(False)

    init_op = state_ops.assign(init_done, True)
    row_prep_op = state_ops.assign(row_prep_done, True)
    col_prep_op = state_ops.assign(col_prep_done, True)
    row_train_op = state_ops.assign(row_train_done, True)
    col_train_op = state_ops.assign(col_train_done, True)
    train_op = control_flow_ops.no_op()
    switch_op = control_flow_ops.group(
        state_ops.assign(is_sweep_done_var, False),
        state_ops.assign(is_row_sweep_var,
                         math_ops.logical_not(is_row_sweep_var)))
    mark_sweep_done = state_ops.assign(is_sweep_done_var, True)

    with self.test_session() as sess:
      sweep_hook = wals_lib._SweepHook(
          is_row_sweep_var,
          is_sweep_done_var,
          init_op,
          [row_prep_op],
          [col_prep_op],
          row_train_op,
          col_train_op,
          switch_op)
      mon_sess = monitored_session._HookedSession(sess, [sweep_hook])
      sess.run([variables.global_variables_initializer()])

      # Row sweep.
      mon_sess.run(train_op)
      self.assertTrue(sess.run(init_done),
                      msg='init op not run by the Sweephook')
      self.assertTrue(sess.run(row_prep_done),
                      msg='row_prep_op not run by the SweepHook')
      self.assertTrue(sess.run(row_train_done),
                      msg='row_train_op not run by the SweepHook')
      self.assertTrue(
          sess.run(is_row_sweep_var),
          msg='Row sweep is not complete but is_row_sweep_var is False.')
      # Col sweep.
      mon_sess.run(mark_sweep_done)
      mon_sess.run(train_op)
      self.assertTrue(sess.run(col_prep_done),
                      msg='col_prep_op not run by the SweepHook')
      self.assertTrue(sess.run(col_train_done),
                      msg='col_train_op not run by the SweepHook')
      self.assertFalse(
          sess.run(is_row_sweep_var),
          msg='Col sweep is not complete but is_row_sweep_var is True.')
      # Row sweep.
      mon_sess.run(mark_sweep_done)
      mon_sess.run(train_op)
      self.assertTrue(
          sess.run(is_row_sweep_var),
          msg='Col sweep is complete but is_row_sweep_var is False.')
 def __invert__(self):
   # ops.Tensor used math_ops.logical_not as of August 2017.
   # Now that bitwise_ops.invert exists, it might make sense
   # for both ops.Tensor and TensorNode to use that if the
   # type is compatible.
   return math_ops.logical_not(self)
Exemplo n.º 44
0
        def body(time, outputs_ta, state, inputs, finished, sequence_lengths):
            """Internal while_loop body.

      Args:
        time: scalar int32 tensor.
        outputs_ta: structure of TensorArray.
        state: (structure of) state tensors and TensorArrays.
        inputs: (structure of) input tensors.
        finished: bool tensor (keeping track of what's finished).
        sequence_lengths: int32 tensor (keeping track of time of finish).

      Returns:
        `(time + 1, outputs_ta, next_state, next_inputs, next_finished,
          next_sequence_lengths)`.
        ```
      """
            (next_outputs, decoder_state, next_inputs,
             decoder_finished) = decoder.step(time, inputs, state)
            decoder_state_sequence_lengths = False
            if decoder.tracks_own_finished:
                next_finished = decoder_finished
                lengths = getattr(decoder_state, "lengths", None)
                if lengths is not None:
                    # sequence lengths are provided by decoder_state.lengths; overwrite
                    # our sequence lengths.
                    decoder_state_sequence_lengths = True
                    sequence_lengths = math_ops.cast(lengths, dtypes.int32)
            else:
                next_finished = math_ops.logical_or(decoder_finished, finished)

            if decoder_state_sequence_lengths:
                # Just pass something through the loop; at the next iteration we'll pull
                # the sequence lengths from the decoder_state again.
                next_sequence_lengths = sequence_lengths
            else:
                next_sequence_lengths = array_ops.where(
                    math_ops.logical_not(finished),
                    array_ops.fill(array_ops.shape(sequence_lengths),
                                   time + 1), sequence_lengths)

            nest.assert_same_structure(state, decoder_state)
            nest.assert_same_structure(outputs_ta, next_outputs)
            nest.assert_same_structure(inputs, next_inputs)

            # Zero out output values past finish
            if impute_finished:
                emit = nest.map_structure(
                    lambda out, zero: array_ops.where(finished, zero, out),
                    next_outputs, zero_outputs)
            else:
                emit = next_outputs

            # Copy through states past finish
            def _maybe_copy_state(new, cur):
                # TensorArrays and scalar states get passed through.
                if isinstance(cur, tensor_array_ops.TensorArray):
                    pass_through = True
                else:
                    new.set_shape(cur.shape)
                    pass_through = (new.shape.ndims == 0)
                return new if pass_through else array_ops.where(
                    finished, cur, new)

            if impute_finished:
                next_state = nest.map_structure(_maybe_copy_state,
                                                decoder_state, state)
            else:
                next_state = decoder_state

            outputs_ta = nest.map_structure(
                lambda ta, out: ta.write(time, out), outputs_ta, emit)
            return (time + 1, outputs_ta, next_state, next_inputs,
                    next_finished, next_sequence_lengths)
Exemplo n.º 45
0
def _wals_factorization_model_function(features, labels, mode, params):
  """Model function for the WALSFactorization estimator.

  Args:
    features: Dictionary of features. See WALSMatrixFactorization.
    labels: Must be None.
    mode: A model_fn.ModeKeys object.
    params: Dictionary of parameters containing arguments passed to the
      WALSMatrixFactorization constructor.

  Returns:
    A ModelFnOps object.

  Raises:
    ValueError: If `mode` is not recognized.
  """
  assert labels is None
  use_factors_weights_cache = (params["use_factors_weights_cache_for_training"]
                               and mode == model_fn.ModeKeys.TRAIN)
  use_gramian_cache = (params["use_gramian_cache_for_training"] and
                       mode == model_fn.ModeKeys.TRAIN)
  max_sweeps = params["max_sweeps"]
  model = factorization_ops.WALSModel(
      params["num_rows"],
      params["num_cols"],
      params["embedding_dimension"],
      unobserved_weight=params["unobserved_weight"],
      regularization=params["regularization_coeff"],
      row_init=params["row_init"],
      col_init=params["col_init"],
      num_row_shards=params["num_row_shards"],
      num_col_shards=params["num_col_shards"],
      row_weights=params["row_weights"],
      col_weights=params["col_weights"],
      use_factors_weights_cache=use_factors_weights_cache,
      use_gramian_cache=use_gramian_cache)

  # Get input rows and cols. We either update rows or columns depending on
  # the value of row_sweep, which is maintained using a session hook.
  input_rows = features[WALSMatrixFactorization.INPUT_ROWS]
  input_cols = features[WALSMatrixFactorization.INPUT_COLS]

  # TRAIN mode:
  if mode == model_fn.ModeKeys.TRAIN:
    # Training consists of the following ops (controlled using a SweepHook).
    # Before a row sweep:
    #   row_update_prep_gramian_op
    #   initialize_row_update_op
    # During a row sweep:
    #   update_row_factors_op
    # Before a col sweep:
    #   col_update_prep_gramian_op
    #   initialize_col_update_op
    # During a col sweep:
    #   update_col_factors_op

    is_row_sweep_var = variable_scope.variable(
        True,
        trainable=False,
        name="is_row_sweep",
        collections=[ops.GraphKeys.GLOBAL_VARIABLES])
    is_sweep_done_var = variable_scope.variable(
        False,
        trainable=False,
        name="is_sweep_done",
        collections=[ops.GraphKeys.GLOBAL_VARIABLES])
    completed_sweeps_var = variable_scope.variable(
        0,
        trainable=False,
        name=WALSMatrixFactorization.COMPLETED_SWEEPS,
        collections=[ops.GraphKeys.GLOBAL_VARIABLES])
    loss_var = variable_scope.variable(
        0.,
        trainable=False,
        name=WALSMatrixFactorization.LOSS,
        collections=[ops.GraphKeys.GLOBAL_VARIABLES])
    # The root weighted squared error =
    #   \\(\sqrt( \sum_{i,j} w_ij * (a_ij - r_ij)^2 / \sum_{i,j} w_ij )\\)
    rwse_var = variable_scope.variable(
        0.,
        trainable=False,
        name=WALSMatrixFactorization.RWSE,
        collections=[ops.GraphKeys.GLOBAL_VARIABLES])

    summary.scalar("loss", loss_var)
    summary.scalar("root_weighted_squared_error", rwse_var)
    summary.scalar("completed_sweeps", completed_sweeps_var)

    def create_axis_ops(sp_input, num_items, update_fn, axis_name):
      """Creates book-keeping and training ops for a given axis.

      Args:
        sp_input: A SparseTensor corresponding to the row or column batch.
        num_items: An integer, the total number of items of this axis.
        update_fn: A function that takes one argument (`sp_input`), and that
        returns a tuple of
          * new_factors: A float Tensor of the factor values after update.
          * update_op: a TensorFlow op which updates the factors.
          * loss: A float Tensor, the unregularized loss.
          * reg_loss: A float Tensor, the regularization loss.
          * sum_weights: A float Tensor, the sum of factor weights.
        axis_name: A string that specifies the name of the axis.

      Returns:
        A tuple consisting of:
          * reset_processed_items_op: A TensorFlow op, to be run before the
            beginning of any sweep. It marks all items as not-processed.
          * axis_train_op: A Tensorflow op, to be run during this axis' sweeps.
      """
      processed_items_init = array_ops.fill(dims=[num_items], value=False)
      with ops.colocate_with(processed_items_init):
        processed_items = variable_scope.variable(
            processed_items_init,
            collections=[ops.GraphKeys.GLOBAL_VARIABLES],
            trainable=False,
            name="processed_" + axis_name)
      _, update_op, loss, reg, sum_weights = update_fn(sp_input)
      input_indices = sp_input.indices[:, 0]
      with ops.control_dependencies([
          update_op,
          state_ops.assign(loss_var, loss + reg),
          state_ops.assign(rwse_var, math_ops.sqrt(loss / sum_weights))]):
        with ops.colocate_with(processed_items):
          update_processed_items = state_ops.scatter_update(
              processed_items,
              input_indices,
              array_ops.ones_like(input_indices, dtype=dtypes.bool),
              name="update_processed_{}_indices".format(axis_name))
        with ops.control_dependencies([update_processed_items]):
          is_sweep_done = math_ops.reduce_all(processed_items)
          axis_train_op = control_flow_ops.group(
              state_ops.assign(is_sweep_done_var, is_sweep_done),
              state_ops.assign_add(
                  completed_sweeps_var,
                  math_ops.cast(is_sweep_done, dtypes.int32)),
              name="{}_sweep_train_op".format(axis_name))
      return processed_items.initializer, axis_train_op

    reset_processed_rows_op, row_train_op = create_axis_ops(
        input_rows,
        params["num_rows"],
        lambda x: model.update_row_factors(sp_input=x, transpose_input=False),
        "rows")
    reset_processed_cols_op, col_train_op = create_axis_ops(
        input_cols,
        params["num_cols"],
        lambda x: model.update_col_factors(sp_input=x, transpose_input=True),
        "cols")
    switch_op = control_flow_ops.group(
        state_ops.assign(
            is_row_sweep_var, math_ops.logical_not(is_row_sweep_var)),
        reset_processed_rows_op,
        reset_processed_cols_op,
        name="sweep_switch_op")
    row_prep_ops = [
        model.row_update_prep_gramian_op, model.initialize_row_update_op]
    col_prep_ops = [
        model.col_update_prep_gramian_op, model.initialize_col_update_op]
    init_op = model.worker_init
    sweep_hook = _SweepHook(
        is_row_sweep_var, is_sweep_done_var, init_op,
        row_prep_ops, col_prep_ops, row_train_op, col_train_op, switch_op)
    global_step_hook = _IncrementGlobalStepHook()
    training_hooks = [sweep_hook, global_step_hook]
    if max_sweeps is not None:
      training_hooks.append(_StopAtSweepHook(max_sweeps))

    return model_fn.ModelFnOps(
        mode=model_fn.ModeKeys.TRAIN,
        predictions={},
        loss=loss_var,
        eval_metric_ops={},
        train_op=control_flow_ops.no_op(),
        training_hooks=training_hooks)

  # INFER mode
  elif mode == model_fn.ModeKeys.INFER:
    projection_weights = features.get(
        WALSMatrixFactorization.PROJECTION_WEIGHTS)

    def get_row_projection():
      return model.project_row_factors(
          sp_input=input_rows,
          projection_weights=projection_weights,
          transpose_input=False)

    def get_col_projection():
      return model.project_col_factors(
          sp_input=input_cols,
          projection_weights=projection_weights,
          transpose_input=True)

    predictions = {
        WALSMatrixFactorization.PROJECTION_RESULT: control_flow_ops.cond(
            features[WALSMatrixFactorization.PROJECT_ROW],
            get_row_projection,
            get_col_projection)
    }

    return model_fn.ModelFnOps(
        mode=model_fn.ModeKeys.INFER,
        predictions=predictions,
        loss=None,
        eval_metric_ops={},
        train_op=control_flow_ops.no_op(),
        training_hooks=[])

  # EVAL mode
  elif mode == model_fn.ModeKeys.EVAL:
    def get_row_loss():
      _, _, loss, reg, _ = model.update_row_factors(
          sp_input=input_rows, transpose_input=False)
      return loss + reg
    def get_col_loss():
      _, _, loss, reg, _ = model.update_col_factors(
          sp_input=input_cols, transpose_input=True)
      return loss + reg
    loss = control_flow_ops.cond(
        features[WALSMatrixFactorization.PROJECT_ROW],
        get_row_loss,
        get_col_loss)
    return model_fn.ModelFnOps(
        mode=model_fn.ModeKeys.EVAL,
        predictions={},
        loss=loss,
        eval_metric_ops={},
        train_op=control_flow_ops.no_op(),
        training_hooks=[])

  else:
    raise ValueError("mode=%s is not recognized." % str(mode))
Exemplo n.º 46
0
def update_confusion_matrix_variables(variables_to_update,
                                      y_true,
                                      y_pred,
                                      thresholds,
                                      top_k=None,
                                      class_id=None,
                                      sample_weight=None):
  """Returns op to update the given confusion matrix variables.

  For every pair of values in y_true and y_pred:

  true_positive: y_true == True and y_pred > thresholds
  false_negatives: y_true == True and y_pred <= thresholds
  true_negatives: y_true == False and y_pred <= thresholds
  false_positive: y_true == False and y_pred > thresholds

  The results will be weighted and added together. When multiple thresholds are
  provided, we will repeat the same for every threshold.

  For estimation of these metrics over a stream of data, the function creates an
  `update_op` operation that updates the given variables.

  If `sample_weight` is `None`, weights default to 1.
  Use weights of 0 to mask values.

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A float value or a python list or tuple of float thresholds in
      `[0, 1]`, or NEG_INF (used when top_k is set).
    top_k: Optional int, indicates that the positive labels should be limited to
      the top k predictions.
    class_id: Optional int, limits the prediction and labels to the class
      specified by this argument.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `y_true` dimension).

  Returns:
    Update op.

  Raises:
    ValueError: If `y_pred` and `y_true` have mismatched shapes, or if
      `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if
      `variables_to_update` contains invalid keys.
  """
  if variables_to_update is None:
    return
  y_true = ops.convert_to_tensor(y_true)
  y_pred = ops.convert_to_tensor(y_pred)
  y_pred.shape.assert_is_compatible_with(y_true.shape)

  if not any(
      key for key in variables_to_update if key in list(ConfusionMatrix)):
    raise ValueError(
        'Please provide at least one valid confusion matrix '
        'variable to update. Valid variable key options are: "{}". '
        'Received: "{}"'.format(
            list(ConfusionMatrix), variables_to_update.keys()))

  invalid_keys = [
      key for key in variables_to_update if key not in list(ConfusionMatrix)
  ]
  if invalid_keys:
    raise ValueError(
        'Invalid keys: {}. Valid variable key options are: "{}"'.format(
            invalid_keys, list(ConfusionMatrix)))

  with ops.control_dependencies([
      check_ops.assert_greater_equal(
          y_pred,
          math_ops.cast(0.0, dtype=y_pred.dtype),
          message='predictions must be >= 0'),
      check_ops.assert_less_equal(
          y_pred,
          math_ops.cast(1.0, dtype=y_pred.dtype),
          message='predictions must be <= 1')
  ]):
    y_pred, y_true, sample_weight = squeeze_or_expand_dimensions(
        math_ops.cast(y_pred, dtype=dtypes.float32),
        math_ops.cast(y_true, dtype=dtypes.bool), sample_weight)

  if top_k is not None:
    y_pred = _filter_top_k(y_pred, top_k)
  if class_id is not None:
    y_true = y_true[..., class_id]
    y_pred = y_pred[..., class_id]

  thresholds = to_list(thresholds)
  num_thresholds = len(thresholds)
  num_predictions = array_ops.size(y_pred)

  # Reshape predictions and labels.
  predictions_2d = array_ops.reshape(y_pred, [1, -1])
  labels_2d = array_ops.reshape(
      math_ops.cast(y_true, dtype=dtypes.bool), [1, -1])

  # Tile the thresholds for every prediction.
  thresh_tiled = array_ops.tile(
      array_ops.expand_dims(array_ops.constant(thresholds), 1),
      array_ops.stack([1, num_predictions]))

  # Tile the predictions for every threshold.
  preds_tiled = array_ops.tile(predictions_2d, [num_thresholds, 1])

  # Compare predictions and threshold.
  pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled)

  # Tile labels by number of thresholds
  label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1])

  if sample_weight is not None:
    weights = weights_broadcast_ops.broadcast_weights(
        math_ops.cast(sample_weight, dtype=dtypes.float32), y_pred)
    weights_tiled = array_ops.tile(
        array_ops.reshape(weights, [1, -1]), [num_thresholds, 1])
  else:
    weights_tiled = None

  update_ops = []

  def weighted_assign_add(label, pred, weights, var):
    label_and_pred = math_ops.cast(
        math_ops.logical_and(label, pred), dtype=dtypes.float32)
    if weights is not None:
      label_and_pred *= weights
    return state_ops.assign_add(var, math_ops.reduce_sum(label_and_pred, 1))

  loop_vars = {
      ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos),
  }
  update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
  update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update
  update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update

  if update_fn or update_tn:
    pred_is_neg = math_ops.logical_not(pred_is_pos)
    loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg)

  if update_fp or update_tn:
    label_is_neg = math_ops.logical_not(label_is_pos)
    loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos)
    if update_tn:
      loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg, pred_is_neg)

  for matrix_cond, (label, pred) in loop_vars.items():
    if matrix_cond in variables_to_update:
      update_ops.append(
          weighted_assign_add(label, pred, weights_tiled,
                              variables_to_update[matrix_cond]))
  return control_flow_ops.group(update_ops)
def triplet_semihard_loss(embeddings, labels, margin=1.0):
    """Computes the triplet loss with semi-hard negative mining.

  The loss encourages the positive distances (between a pair of embeddings with
  the same labels) to be smaller than the minimum negative distance among
  which are at least greater than the positive distance plus the margin constant
  (called semi-hard negative) in the mini-batch. If no such negative exists,
  uses the largest negative distance instead.
  See: https://arxiv.org/abs/1503.03832.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    triplet_loss: tf.float32 scalar.
  """
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    #pdist_matrix = cdist(embeddings, embeddings, metric=metric)

    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    # Compute the mask.
    ## Is there any element with different label and is farther than me? If Yes, then there exists a semi-hard negative
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile,
            array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1])))

    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(tf.cast(mask, dtype=dtypes.float32),
                                1,
                                keepdims=True), 0.0), [batch_size, batch_size])
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = tf.cast(adjacency_not, dtype=dtypes.float32)

    mask = tf.cast(mask, dtype=dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])

    semi_hard_negatives = array_ops.where(mask_final, negatives_outside,
                                          negatives_inside)

    if isinstance(margin, numbers.Real):
        # diff = tf.maximum(diff + margin, 0.0)
        loss_mat = pdist_matrix - semi_hard_negatives + margin
    elif margin == 'soft':
        # diff = tf.nn.softplus(diff)
        loss_mat = pdist_matrix - semi_hard_negatives
    elif margin.lower() == 'none':
        pass
    else:
        raise NotImplementedError(
            'The margin {} is not implemented in batch_hard'.format(margin))

    mask_positives = tf.cast(adjacency, dtype=dtypes.float32) - array_ops.diag(
        array_ops.ones([batch_size]))

    if isinstance(margin, numbers.Real):
        print('Margin is real')
        triplet_loss_result = math_ops.maximum(
            tf.boolean_mask(loss_mat, tf.cast(mask_positives, tf.bool)), 0.0)
        assert_op = tf.Assert(tf.equal(tf.rank(triplet_loss_result), 1),
                              ['Rank of image must be equal to 1.'])
        with tf.control_dependencies([assert_op]):
            triplet_loss = triplet_loss_result
    elif margin == 'soft':
        triplet_loss_result = tf.nn.softplus(
            tf.boolean_mask(loss_mat, tf.cast(mask_positives, tf.bool)))
        assert_op = tf.Assert(tf.equal(tf.rank(triplet_loss_result), 1),
                              ['Rank of image must be equal to 1.'])
        with tf.control_dependencies([assert_op]):
            triplet_loss = triplet_loss_result
    elif margin.lower() == 'none':
        pass
    else:
        raise NotImplementedError(
            'The margin {} is not implemented in batch_hard'.format(margin))

    return triplet_loss
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                      beam_width, end_token, length_penalty_weight,
                      coverage_penalty_weight):
    """Performs a single step of Beam Search Decoding.

  Args:
    time: Beam search time step, should start at 0. At time 0 we assume
      that all beams are equal and consider only the first beam for
      continuations.
    logits: Logits at the current time step. A tensor of shape
      `[batch_size, beam_width, vocab_size]`
    next_cell_state: The next state from the cell, e.g. an instance of
      AttentionWrapperState if the cell is attentional.
    beam_state: Current state of the beam search.
      An instance of `BeamSearchDecoderState`.
    batch_size: The batch size for this input.
    beam_width: Python int.  The size of the beams.
    end_token: The int32 end token.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
    coverage_penalty_weight: Float weight to penalize the coverage of source
      sentence. Disabled with 0.0.

  Returns:
    A new beam state.
  """
    static_batch_size = tensor_util.constant_value(batch_size)

    # Calculate the current lengths of the predictions
    prediction_lengths = beam_state.lengths
    previously_finished = beam_state.finished
    not_finished = math_ops.logical_not(previously_finished)

    # Calculate the total log probs for the new hypotheses
    # Final Shape: [batch_size, beam_width, vocab_size]
    step_log_probs = nn_ops.log_softmax(logits)
    step_log_probs = _mask_probs(step_log_probs, end_token,
                                 previously_finished)
    total_probs = array_ops.expand_dims(beam_state.log_probs,
                                        2) + step_log_probs

    # Calculate the continuation lengths by adding to all continuing beams.
    vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1]
    lengths_to_add = array_ops.one_hot(indices=array_ops.fill(
        [batch_size, beam_width], end_token),
                                       depth=vocab_size,
                                       on_value=np.int64(0),
                                       off_value=np.int64(1),
                                       dtype=dtypes.int64)
    add_mask = math_ops.to_int64(not_finished)
    lengths_to_add *= array_ops.expand_dims(add_mask, 2)
    new_prediction_lengths = (lengths_to_add +
                              array_ops.expand_dims(prediction_lengths, 2))

    # Calculate the accumulated attention probabilities if coverage penalty is
    # enabled.
    accumulated_attention_probs = None
    attention_probs = get_attention_probs(next_cell_state,
                                          coverage_penalty_weight)
    if attention_probs is not None:
        attention_probs *= array_ops.expand_dims(
            math_ops.to_float(not_finished), 2)
        accumulated_attention_probs = (beam_state.accumulated_attention_probs +
                                       attention_probs)

    # Calculate the scores for each beam
    scores = _get_scores(
        log_probs=total_probs,
        sequence_lengths=new_prediction_lengths,
        length_penalty_weight=length_penalty_weight,
        coverage_penalty_weight=coverage_penalty_weight,
        finished=previously_finished,
        accumulated_attention_probs=accumulated_attention_probs)

    time = ops.convert_to_tensor(time, name="time")
    # During the first time step we only consider the initial beam
    scores_flat = array_ops.reshape(scores, [batch_size, -1])

    # Pick the next beams according to the specified successors function
    next_beam_size = ops.convert_to_tensor(beam_width,
                                           dtype=dtypes.int32,
                                           name="beam_width")
    next_beam_scores, word_indices = nn_ops.top_k(scores_flat,
                                                  k=next_beam_size)

    next_beam_scores.set_shape([static_batch_size, beam_width])
    word_indices.set_shape([static_batch_size, beam_width])

    # Pick out the probs, beam_ids, and states according to the chosen predictions
    next_beam_probs = _tensor_gather_helper(gather_indices=word_indices,
                                            gather_from=total_probs,
                                            batch_size=batch_size,
                                            range_size=beam_width * vocab_size,
                                            gather_shape=[-1],
                                            name="next_beam_probs")
    # Note: just doing the following
    #   math_ops.to_int32(word_indices % vocab_size,
    #       name="next_beam_word_ids")
    # would be a lot cleaner but for reasons unclear, that hides the results of
    # the op which prevents capturing it with tfdbg debug ops.
    raw_next_word_ids = math_ops.mod(word_indices,
                                     vocab_size,
                                     name="next_beam_word_ids")
    next_word_ids = math_ops.to_int32(raw_next_word_ids)
    next_beam_ids = math_ops.to_int32(word_indices / vocab_size,
                                      name="next_beam_parent_ids")

    # Append new ids to current predictions
    previously_finished = _tensor_gather_helper(
        gather_indices=next_beam_ids,
        gather_from=previously_finished,
        batch_size=batch_size,
        range_size=beam_width,
        gather_shape=[-1])
    next_finished = math_ops.logical_or(previously_finished,
                                        math_ops.equal(next_word_ids,
                                                       end_token),
                                        name="next_beam_finished")

    # Calculate the length of the next predictions.
    # 1. Finished beams remain unchanged.
    # 2. Beams that are now finished (EOS predicted) have their length
    #    increased by 1.
    # 3. Beams that are not yet finished have their length increased by 1.
    lengths_to_add = math_ops.to_int64(
        math_ops.logical_not(previously_finished))
    next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids,
                                                gather_from=beam_state.lengths,
                                                batch_size=batch_size,
                                                range_size=beam_width,
                                                gather_shape=[-1])
    next_prediction_len += lengths_to_add
    next_accumulated_attention_probs = ()
    if accumulated_attention_probs is not None:
        next_accumulated_attention_probs = _tensor_gather_helper(
            gather_indices=next_beam_ids,
            gather_from=accumulated_attention_probs,
            batch_size=batch_size,
            range_size=beam_width,
            gather_shape=[batch_size * beam_width, -1],
            name="next_accumulated_attention_probs")

    # Pick out the cell_states according to the next_beam_ids. We use a
    # different gather_shape here because the cell_state tensors, i.e.
    # the tensors that would be gathered from, all have dimension
    # greater than two and we need to preserve those dimensions.
    # pylint: disable=g-long-lambda
    next_cell_state = nest.map_structure(
        lambda gather_from: _maybe_tensor_gather_helper(
            gather_indices=next_beam_ids,
            gather_from=gather_from,
            batch_size=batch_size,
            range_size=beam_width,
            gather_shape=[batch_size * beam_width, -1]), next_cell_state)
    # pylint: enable=g-long-lambda

    next_state = BeamSearchDecoderState(
        cell_state=next_cell_state,
        log_probs=next_beam_probs,
        lengths=next_prediction_len,
        finished=next_finished,
        accumulated_attention_probs=next_accumulated_attention_probs)

    output = BeamSearchDecoderOutput(scores=next_beam_scores,
                                     predicted_ids=next_word_ids,
                                     parent_ids=next_beam_ids)

    return output, next_state
Exemplo n.º 49
0
        def body(time, outputs_ta, state, inputs, finished, sequence_lengths,
                 hypotheses, input_ids, scores, base_index):
            """Internal while_loop body.

            Args:
              time: scalar int32 tensor.
              outputs_ta: structure of TensorArray.
              state: (structure of) state tensors and TensorArrays.
              inputs: (structure of) input tensors.
              finished: bool tensor (keeping track of what's finished).
              sequence_lengths: int32 tensor (keeping track of time of finish).
              hypotheses: structure of TensorArray (stores hypotheses so far).
              input_ids: structure of TensorArray.
              scores: structure of TensorArray.
              base_index:  int32 tensor (keeping track of size of the above 3 TensorArrays)

            Returns:
              `(time + 1, outputs_ta, next_state, next_inputs, next_finished,
                next_sequence_lengths, new_hypotheses, new_input_ids, new_scores, new_base)`.
              ```
            """
            (next_outputs, decoder_state, next_inputs,
             decoder_finished) = decoder.step(time, inputs, state)
            if decoder.tracks_own_finished:
                next_finished = decoder_finished
            else:
                next_finished = math_ops.logical_or(decoder_finished, finished)
            next_sequence_lengths = array_ops.where(
                math_ops.logical_not(next_finished),
                array_ops.fill(array_ops.shape(sequence_lengths),
                               time + 1 + (not decoder._use_go_tokens)),
                sequence_lengths)

            nest.assert_same_structure(state, decoder_state)
            nest.assert_same_structure(outputs_ta, next_outputs)
            nest.assert_same_structure(inputs, next_inputs)

            # Zero out output values past finish
            if impute_finished:
                emit = nest.map_structure(
                    lambda out, zero: array_ops.where(
                        next_finished, zero, out), next_outputs, zero_outputs)
            else:
                emit = next_outputs

            # Copy through states past finish
            def _maybe_copy_state(new, cur):
                # TensorArrays and scalar states get passed through.
                if isinstance(cur, tensor_array_ops.TensorArray):
                    pass_through = True
                else:
                    new.set_shape(cur.shape)
                    pass_through = (new.shape.ndims == 0)
                return new if pass_through else array_ops.where(
                    finished, cur, new)

            outputs_ta = nest.map_structure(
                lambda ta, out: ta.write(time +
                                         (not decoder._use_go_tokens), out),
                outputs_ta, emit)

            # Extract hypotheses, scores for reference
            outputs_so_far = nest.map_structure(lambda ta: ta.stack(),
                                                outputs_ta)
            parent_ids = outputs_so_far.parent_ids
            hypotheses_so_far = outputs_so_far.predicted_ids
            forward_scores = next_outputs.scores

            sl = tf.ones([decoder.batch_size], tf.int32) * \
                (time + 1 + (not decoder._use_go_tokens))
            hypotheses_so_far = beam_search_ops.gather_tree(
                hypotheses_so_far,
                parent_ids,
                max_sequence_lengths=sl,
                end_token=decoder._end_token)

            # Add repetition penalty
            if repetition != 0:

                def unique_counter(x):
                    return tf.cast(tf.size(tf.unique(x)[0]), tf.float32)

                def wrapper_rep_penalty_function(sentence):
                    def first_time_penalty():
                        total_unique_words = unique_counter(sentence)
                        return tf.log(total_unique_words)

                    def generic_penalty():
                        sentence_length = tf.shape(sentence)[0]
                        total_unique_words = unique_counter(sentence)
                        unique_words_before = unique_counter(
                            sentence[:sentence_length - 1])
                        return (tf.log(total_unique_words) -
                                tf.log(unique_words_before))

                    return repetition * tf.cond(math_ops.equal(
                        tf.shape(sentence)[0], 1),
                                                true_fn=first_time_penalty,
                                                false_fn=generic_penalty)

                # Use reshaped hypotheses; calculate penalty per beam per batch
                transposed_hypotheses = tf.transpose(hypotheses_so_far,
                                                     [2, 1, 0])
                repetition_penalty = tf.map_fn(lambda x: tf.map_fn(
                    wrapper_rep_penalty_function, x, dtype=tf.float32),
                                               transposed_hypotheses,
                                               dtype=tf.float32)
                repetition_penalty = tf.transpose(repetition_penalty, [1, 0])
                forward_scores += repetition_penalty

                # Add repetition penalty hypothesis scores
                decoder_state = BeamSearchDecoderState(
                    cell_state=decoder_state.cell_state,
                    log_probs=decoder_state.log_probs + repetition_penalty,
                    finished=decoder_state.finished,
                    lengths=decoder_state.lengths,
                    accumulated_attention_probs=decoder_state.
                    accumulated_attention_probs)

            if impute_finished:
                next_state = nest.map_structure(_maybe_copy_state,
                                                decoder_state, state)
            else:
                next_state = decoder_state

            finished_this_beam = math_ops.logical_and(
                math_ops.logical_not(finished), decoder_finished)
            # Make sure number of outputs is never zero
            finished_this_beam = tf.cond(
                math_ops.logical_and(
                    math_ops.logical_and(
                        tf.equal(hypotheses.size(), 0),
                        tf.equal(tf.size(tf.where(finished_this_beam)), 0)),
                    tf.equal(time, maximum_iterations - 1)),
                true_fn=lambda: tf.cast(tf.ones_like(finished_this_beam),
                                        dtype=tf.bool),
                false_fn=lambda: finished_this_beam)

            def prepare_hypotheses_for_ta():
                finished_beams = tf.where(finished_this_beam)

                hypotheses_for_ta = tf.boolean_mask(
                    tf.transpose(hypotheses_so_far, [1, 2, 0]),
                    finished_this_beam)

                # Pad hypotheses with EOS token
                hypotheses_for_ta = tf.pad(hypotheses_for_ta,
                                           [[0, 0],
                                            [
                                                0, maximum_iterations +
                                                (not decoder._use_go_tokens) -
                                                tf.shape(hypotheses_for_ta)[-1]
                                            ]],
                                           constant_values=decoder._end_token)

                input_query_id = tf.expand_dims(finished_beams[:, 0], 1)
                scores_forward = tf.expand_dims(
                    tf.boolean_mask(forward_scores, finished_this_beam), 1)

                def inner_cond(index, base, hyp_ta, ind_ta, score_ta, hypos,
                               input_ids, forward_scores):
                    # Populate TA with given elements AND do not consider blank responses
                    return math_ops.logical_and(
                        math_ops.less(index,
                                      tf.shape(hypos)[0]),
                        math_ops.greater(time,
                                         0 - (not decoder._use_go_tokens)))

                def inner_body(index, base, hyp_ta, ind_ta, score_ta, hypos,
                               input_ids, forward_scores):
                    new_hyp_ta = nest.map_structure(
                        lambda ta, out: ta.write(base, out), hyp_ta,
                        hypos[index])
                    new_ind_ta = nest.map_structure(
                        lambda ta, out: ta.write(base, out), ind_ta,
                        input_ids[index])

                    # Remove repetition penalty from stored score, use as a feature for later re-reranking
                    forward_scores_store = forward_scores[index]
                    if repetition != 0:
                        forward_scores_store -= repetition * \
                            unique_counter(hypos[index])

                    # Normalize finished scores by their length
                    new_scores_ta = nest.map_structure(
                        lambda ta, out: ta.write(base, out), score_ta,
                        forward_scores_store / tf.cast(
                            tf.count_nonzero(hypos[index] -
                                             decoder._end_token), tf.float32))

                    return (index + 1, base + 1, new_hyp_ta, new_ind_ta,
                            new_scores_ta, hypos, input_ids, forward_scores)

                # Add multiple hypotheses (and related information) to TensorArray using a while_loop
                inner_result = tf.while_loop(
                    inner_cond,
                    inner_body,
                    loop_vars=(tf.constant(0), base_index, hypotheses,
                               input_ids, scores, hypotheses_for_ta,
                               input_query_id, scores_forward),
                    parallel_iterations=parallel_iterations,
                    swap_memory=swap_memory)
                return inner_result[1], inner_result[2], inner_result[
                    3], inner_result[4]

            # In case finished is not True for any beams
            new_base, new_hypotheses, new_input_ids, new_scores = tf.cond(
                math_ops.greater(tf.count_nonzero(finished_this_beam), 0),
                true_fn=prepare_hypotheses_for_ta,
                false_fn=lambda: (base_index, hypotheses, input_ids, scores))

            return (time + 1, outputs_ta, next_state, next_inputs,
                    next_finished, next_sequence_lengths, new_hypotheses,
                    new_input_ids, new_scores, new_base)
Exemplo n.º 50
0
 def __invert__(self):
   # ops.Tensor used math_ops.logical_not as of August 2017.
   # Now that bitwise_ops.invert exists, it might make sense
   # for both ops.Tensor and TensorNode to use that if the
   # type is compatible.
   return math_ops.logical_not(self)
Exemplo n.º 51
0
 def condition(unused_time, elements_finished, *_):
   return math_ops.logical_not(math_ops.reduce_all(elements_finished))
Exemplo n.º 52
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Wraps the original apply_gradient of the optimizer.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      global_step: Optional `Variable` to increment by one after the variables
        have been updated.
      name: Optional name for the returned operation.  Default to the name
        passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.
    """
        def apply_gradient_op():
            return self._optimizer.apply_gradients(grads_and_vars,
                                                   global_step=global_step,
                                                   name=name)

        maybe_reduce = lambda x: x
        if self._use_tpu:
            maybe_reduce = tpu_ops.cross_replica_sum
        grads_and_vars_dict = {
            re.findall('(.+)/weights:0', var.name)[0]:
            (maybe_reduce(grad), var)
            for grad, var in grads_and_vars if var.name.endswith('weights:0')
        }

        def snip_fn(mask, sparsity, dtype):
            """Creates a random sparse mask with deterministic sparsity.

      Args:
        mask: tf.Tensor, used to obtain correct corresponding gradient.
        sparsity: float, between 0 and 1.
        dtype: tf.dtype, type of the return value.

      Returns:
        tf.Tensor
      """
            del dtype
            var_name = sparse_utils.mask_extract_name_fn(mask.name)
            g, v = grads_and_vars_dict[var_name]
            score_drop = math_ops.abs(g * v)
            n_total = np.prod(score_drop.shape.as_list())
            n_prune = sparse_utils.get_n_zeros(n_total, sparsity)
            n_keep = n_total - n_prune

            # Sort the entire array since the k needs to be constant for TPU.
            _, sorted_indices = nn_ops.top_k(array_ops.reshape(
                score_drop, [-1]),
                                             k=n_total)
            sorted_indices_ex = array_ops.expand_dims(sorted_indices, 1)
            # We will have zeros after having `n_keep` many ones.
            new_values = array_ops.where(
                math_ops.range(n_total) < n_keep,
                array_ops.ones_like(sorted_indices, dtype=mask.dtype),
                array_ops.zeros_like(sorted_indices, dtype=mask.dtype))
            new_mask = array_ops.scatter_nd(sorted_indices_ex, new_values,
                                            new_values.shape)
            return array_ops.reshape(new_mask, mask.shape)

        def snip_op():
            all_masks = pruning.get_masks()
            assigner = sparse_utils.get_mask_init_fn(all_masks,
                                                     self._mask_init_method,
                                                     self._default_sparsity,
                                                     self._custom_sparsity_map,
                                                     mask_fn=snip_fn)
            with ops.control_dependencies([assigner]):
                assign_op = state_ops.assign(self.is_snipped,
                                             True,
                                             name='assign_true_after_snipped')
            return assign_op

        maybe_snip_op = control_flow_ops.cond(
            math_ops.logical_and(math_ops.equal(global_step, 0),
                                 math_ops.logical_not(self.is_snipped)),
            snip_op, apply_gradient_op)

        return maybe_snip_op
Exemplo n.º 53
0
 def condition(unused_time, elements_finished, *_):
     return math_ops.logical_not(math_ops.reduce_all(elements_finished))
Exemplo n.º 54
0
    def next_inputs(self, time, outputs, state, sample_ids, name=None):
        with ops.name_scope(name, "ScheduledOutputTrainingHelperNextInputs",
                            [time, outputs, state, sample_ids]):
            (finished, base_next_inputs,
             state) = (super(ScheduledOutputTrainingHelper,
                             self).next_inputs(time=time,
                                               outputs=outputs,
                                               state=state,
                                               sample_ids=sample_ids,
                                               name=name))
            sample_ids = math_ops.cast(sample_ids, dtypes.bool)

            def maybe_sample():
                """Perform scheduled sampling."""
                def maybe_concatenate_auxiliary_inputs(outputs_, indices=None):
                    """Concatenate outputs with auxiliary inputs, if they exist."""
                    if self._auxiliary_input_tas is None:
                        return outputs_

                    next_time = time + 1
                    auxiliary_inputs = nest.map_structure(
                        lambda ta: ta.read(next_time),
                        self._auxiliary_input_tas)
                    if indices is not None:
                        auxiliary_inputs = array_ops.gather_nd(
                            auxiliary_inputs, indices)
                    return nest.map_structure(
                        lambda x, y: array_ops.concat((x, y), -1), outputs_,
                        auxiliary_inputs)

                if self._next_inputs_fn is None:
                    return array_ops.where(
                        sample_ids,
                        maybe_concatenate_auxiliary_inputs(outputs),
                        base_next_inputs)

                where_sampling = math_ops.cast(array_ops.where(sample_ids),
                                               dtypes.int32)
                where_not_sampling = math_ops.cast(
                    array_ops.where(math_ops.logical_not(sample_ids)),
                    dtypes.int32)
                outputs_sampling = array_ops.gather_nd(outputs, where_sampling)
                inputs_not_sampling = array_ops.gather_nd(
                    base_next_inputs, where_not_sampling)
                sampled_next_inputs = maybe_concatenate_auxiliary_inputs(
                    self._next_inputs_fn(outputs_sampling), where_sampling)

                base_shape = array_ops.shape(base_next_inputs)
                return (array_ops.scatter_nd(indices=where_sampling,
                                             updates=sampled_next_inputs,
                                             shape=base_shape) +
                        array_ops.scatter_nd(indices=where_not_sampling,
                                             updates=inputs_not_sampling,
                                             shape=base_shape))

            all_finished = math_ops.reduce_all(finished)
            no_samples = math_ops.logical_not(math_ops.reduce_any(sample_ids))
            next_inputs = control_flow_ops.cond(
                math_ops.logical_or(all_finished, no_samples),
                lambda: base_next_inputs, maybe_sample)
            return (finished, next_inputs, state)
Exemplo n.º 55
0
def update_confusion_matrix_variables(variables_to_update,
                                      y_true,
                                      y_pred,
                                      thresholds,
                                      top_k=None,
                                      class_id=None,
                                      sample_weight=None):
  """Returns op to update the given confusion matrix variables.

  For every pair of values in y_true and y_pred:

  true_positive: y_true == True and y_pred > thresholds
  false_negatives: y_true == True and y_pred <= thresholds
  true_negatives: y_true == False and y_pred <= thresholds
  false_positive: y_true == False and y_pred > thresholds

  The results will be weighted and added together. When multiple thresholds are
  provided, we will repeat the same for every threshold.

  For estimation of these metrics over a stream of data, the function creates an
  `update_op` operation that updates the given variables.

  If `sample_weight` is `None`, weights default to 1.
  Use weights of 0 to mask values.

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A float value or a python list or tuple of float thresholds in
      `[0, 1]`, or NEG_INF (used when top_k is set).
    top_k: Optional int, indicates that the positive labels should be limited to
      the top k predictions.
    class_id: Optional int, limits the prediction and labels to the class
      specified by this argument.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `y_true` dimension).

  Returns:
    Update op.

  Raises:
    ValueError: If `y_pred` and `y_true` have mismatched shapes, or if
      `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if
      `variables_to_update` contains invalid keys.
  """
  if variables_to_update is None:
    return
  y_true = ops.convert_to_tensor(y_true)
  y_pred = ops.convert_to_tensor(y_pred)
  y_pred.shape.assert_is_compatible_with(y_true.shape)

  if not any(
      key for key in variables_to_update if key in list(ConfusionMatrix)):
    raise ValueError(
        'Please provide at least one valid confusion matrix '
        'variable to update. Valid variable key options are: "{}". '
        'Received: "{}"'.format(
            list(ConfusionMatrix), variables_to_update.keys()))

  invalid_keys = [
      key for key in variables_to_update if key not in list(ConfusionMatrix)
  ]
  if invalid_keys:
    raise ValueError(
        'Invalid keys: {}. Valid variable key options are: "{}"'.format(
            invalid_keys, list(ConfusionMatrix)))

  with ops.control_dependencies([
      check_ops.assert_greater_equal(
          y_pred,
          math_ops.cast(0.0, dtype=y_pred.dtype),
          message='predictions must be >= 0'),
      check_ops.assert_less_equal(
          y_pred,
          math_ops.cast(1.0, dtype=y_pred.dtype),
          message='predictions must be <= 1')
  ]):
    y_pred, y_true, sample_weight = squeeze_or_expand_dimensions(
        math_ops.cast(y_pred, dtype=dtypes.float32),
        math_ops.cast(y_true, dtype=dtypes.bool), sample_weight)

  if top_k is not None:
    y_pred = _filter_top_k(y_pred, top_k)
  if class_id is not None:
    y_true = y_true[..., class_id]
    y_pred = y_pred[..., class_id]

  thresholds = to_list(thresholds)
  num_thresholds = len(thresholds)
  num_predictions = array_ops.size(y_pred)

  # Reshape predictions and labels.
  predictions_2d = array_ops.reshape(y_pred, [1, -1])
  labels_2d = array_ops.reshape(
      math_ops.cast(y_true, dtype=dtypes.bool), [1, -1])

  # Tile the thresholds for every prediction.
  thresh_tiled = array_ops.tile(
      array_ops.expand_dims(array_ops.constant(thresholds), 1),
      array_ops.stack([1, num_predictions]))

  # Tile the predictions for every threshold.
  preds_tiled = array_ops.tile(predictions_2d, [num_thresholds, 1])

  # Compare predictions and threshold.
  pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled)

  # Tile labels by number of thresholds
  label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1])

  if sample_weight is not None:
    weights = weights_broadcast_ops.broadcast_weights(
        math_ops.cast(sample_weight, dtype=dtypes.float32), y_pred)
    weights_tiled = array_ops.tile(
        array_ops.reshape(weights, [1, -1]), [num_thresholds, 1])
  else:
    weights_tiled = None

  update_ops = []

  def weighted_assign_add(label, pred, weights, var):
    label_and_pred = math_ops.cast(
        math_ops.logical_and(label, pred), dtype=dtypes.float32)
    if weights is not None:
      label_and_pred *= weights
    return state_ops.assign_add(var, math_ops.reduce_sum(label_and_pred, 1))

  loop_vars = {
      ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos),
  }
  update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
  update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update
  update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update

  if update_fn or update_tn:
    pred_is_neg = math_ops.logical_not(pred_is_pos)
    loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg)

  if update_fp or update_tn:
    label_is_neg = math_ops.logical_not(label_is_pos)
    loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos)
    if update_tn:
      loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg, pred_is_neg)

  for matrix_cond, (label, pred) in loop_vars.items():
    if matrix_cond in variables_to_update:
      update_ops.append(
          weighted_assign_add(label, pred, weights_tiled,
                              variables_to_update[matrix_cond]))
  return control_flow_ops.group(update_ops)
Exemplo n.º 56
0
 def condition(unused_time, unused_outputs_ta, unused_state,
               unused_inputs, finished, unused_sequence_lengths):
     return math_ops.logical_not(math_ops.reduce_all(finished))
def _logical_not(x):
  """Convenience function which attempts to statically apply `logical_not`."""
  x_ = _static_value(x)
  if x_ is None:
    return math_ops.logical_not(x)
  return constant_op.constant(np.logical_not(x_))
Exemplo n.º 58
0
        def body(time, outputs_ta, state, inputs, finished, sequence_lengths):
            """Internal while_loop body.

      Args:
        time: scalar int32 tensor.
        outputs_ta: structure of TensorArray.
        state: (structure of) state tensors and TensorArrays.
        inputs: (structure of) input tensors.
        finished: bool tensor (keeping track of what's finished).
        sequence_lengths: int32 tensor (keeping track of time of finish).

      Returns:
        `(time + 1, outputs_ta, next_state, next_inputs, next_finished,
          next_sequence_lengths)`.
        ```
      """
            (next_outputs, decoder_state, next_inputs,
             decoder_finished) = decoder.step(time, inputs, state)
            if decoder.tracks_own_finished:
                next_finished = decoder_finished
            else:
                next_finished = math_ops.logical_or(decoder_finished, finished)
            if maximum_iterations is not None:
                next_finished = math_ops.logical_or(
                    next_finished, time + 1 >= maximum_iterations)
            next_sequence_lengths = array_ops.where(
                math_ops.logical_and(math_ops.logical_not(finished),
                                     next_finished),
                array_ops.fill(array_ops.shape(sequence_lengths), time + 1),
                sequence_lengths)

            nest.assert_same_structure(state, decoder_state)
            nest.assert_same_structure(outputs_ta, next_outputs)
            nest.assert_same_structure(inputs, next_inputs)

            # Zero out output values past finish
            if impute_finished:
                emit = nest.map_structure(
                    lambda out, zero: array_ops.where(finished, zero, out),
                    next_outputs, zero_outputs)
            else:
                emit = next_outputs

            # Copy through states past finish
            def _maybe_copy_state(new, cur):
                # TensorArrays and scalar states get passed through.
                if isinstance(cur, tensor_array_ops.TensorArray):
                    pass_through = True
                else:
                    new.set_shape(cur.shape)
                    pass_through = (new.shape.ndims == 0)
                return new if pass_through else array_ops.where(
                    finished, cur, new)

            if impute_finished:
                next_state = nest.map_structure(_maybe_copy_state,
                                                decoder_state, state)
            else:
                next_state = decoder_state

            outputs_ta = nest.map_structure(
                lambda ta, out: ta.write(time, out), outputs_ta, emit)
            return (time + 1, outputs_ta, next_state, next_inputs,
                    next_finished, next_sequence_lengths)
Exemplo n.º 59
0
def triplet_semihard_loss(labels, embeddings, margin=1.0):
  """Computes the triplet loss with semi-hard negative mining.

  The loss encourages the positive distances (between a pair of embeddings with
  the same labels) to be smaller than the minimum negative distance among
  which are at least greater than the positive distance plus the margin constant
  (called semi-hard negative) in the mini-batch. If no such negative exists,
  uses the largest negative distance instead.
  See: https://arxiv.org/abs/1503.03832.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    triplet_loss: tf.float32 scalar.
  """
  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
  lshape = array_ops.shape(labels)
  assert lshape.shape == 1
  labels = array_ops.reshape(labels, [lshape[0], 1])

  # Build pairwise squared distance matrix.
  pdist_matrix = pairwise_distance(embeddings, squared=True)
  # Build pairwise binary adjacency matrix.
  adjacency = math_ops.equal(labels, array_ops.transpose(labels))
  # Invert so we can select negatives only.
  adjacency_not = math_ops.logical_not(adjacency)

  batch_size = array_ops.size(labels)

  # Compute the mask.
  pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
  mask = math_ops.logical_and(
      array_ops.tile(adjacency_not, [batch_size, 1]),
      math_ops.greater(
          pdist_matrix_tile, array_ops.reshape(
              array_ops.transpose(pdist_matrix), [-1, 1])))
  mask_final = array_ops.reshape(
      math_ops.greater(
          math_ops.reduce_sum(
              math_ops.cast(
                  mask, dtype=dtypes.float32), 1, keep_dims=True),
          0.0), [batch_size, batch_size])
  mask_final = array_ops.transpose(mask_final)

  adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
  mask = math_ops.cast(mask, dtype=dtypes.float32)

  # negatives_outside: smallest D_an where D_an > D_ap.
  negatives_outside = array_ops.reshape(
      masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
  negatives_outside = array_ops.transpose(negatives_outside)

  # negatives_inside: largest D_an.
  negatives_inside = array_ops.tile(
      masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
  semi_hard_negatives = array_ops.where(
      mask_final, negatives_outside, negatives_inside)

  loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

  mask_positives = math_ops.cast(
      adjacency, dtype=dtypes.float32) - array_ops.diag(
          array_ops.ones([batch_size]))

  # In lifted-struct, the authors multiply 0.5 for upper triangular
  #   in semihard, they take all positive pairs except the diagonal.
  num_positives = math_ops.reduce_sum(mask_positives)

  triplet_loss = math_ops.truediv(
      math_ops.reduce_sum(
          math_ops.maximum(
              math_ops.multiply(loss_mat, mask_positives), 0.0)),
      num_positives,
      name='triplet_semihard_loss')

  return triplet_loss
Exemplo n.º 60
0
def triplet_semihard_loss(y_true, y_pred, margin=1.0):
    """Computes the triplet loss with semi-hard negative mining.

    Args:
      y_true: 1-D integer `Tensor` with shape [batch_size] of
        multiclass integer labels.
      y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should
        be l2 normalized.
      margin: Float, margin term in the loss definition.
    """
    labels, embeddings = y_true, y_pred
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pdist_matrix = metric_learning.pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    # Compute the mask.
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile, array_ops.reshape(
                array_ops.transpose(pdist_matrix), [-1, 1])))
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(
                math_ops.cast(mask,
                              dtype=dtypes.float32),
                1,
                keepdims=True),
            0.0),
        [batch_size, batch_size])
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    mask = math_ops.cast(mask, dtype=dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = array_ops.reshape(
        _masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = array_ops.tile(
        _masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = array_ops.where(mask_final,
                                          negatives_outside,
                                          negatives_inside)

    loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = math_ops.cast(
        adjacency,
        dtype=dtypes.float32) - array_ops.diag(array_ops.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = math_ops.reduce_sum(mask_positives)

    triplet_loss = math_ops.truediv(
        math_ops.reduce_sum(
            math_ops.maximum(
                math_ops.multiply(loss_mat, mask_positives),
                0.0)),
        num_positives)

    return triplet_loss