Exemplo n.º 1
0
  def testConstraints(self):
    # Conv1D
    k_constraint = lambda x: x / math_ops.reduce_sum(x)
    b_constraint = lambda x: x / math_ops.reduce_max(x)
    conv1d = conv_layers.Conv1D(2, 3,
                                kernel_constraint=k_constraint,
                                bias_constraint=b_constraint)
    inputs = random_ops.random_uniform((5, 3, 5), seed=1)
    conv1d(inputs)
    self.assertEqual(conv1d.kernel_constraint, k_constraint)
    self.assertEqual(conv1d.bias_constraint, b_constraint)

    # Conv2D
    k_constraint = lambda x: x / math_ops.reduce_sum(x)
    b_constraint = lambda x: x / math_ops.reduce_max(x)
    conv2d = conv_layers.Conv2D(2, 3,
                                kernel_constraint=k_constraint,
                                bias_constraint=b_constraint)
    inputs = random_ops.random_uniform((5, 3, 3, 5), seed=1)
    conv2d(inputs)
    self.assertEqual(conv2d.kernel_constraint, k_constraint)
    self.assertEqual(conv2d.bias_constraint, b_constraint)

    # Conv3D
    k_constraint = lambda x: x / math_ops.reduce_sum(x)
    b_constraint = lambda x: x / math_ops.reduce_max(x)
    conv3d = conv_layers.Conv3D(2, 3,
                                kernel_constraint=k_constraint,
                                bias_constraint=b_constraint)
    inputs = random_ops.random_uniform((5, 3, 3, 3, 5), seed=1)
    conv3d(inputs)
    self.assertEqual(conv3d.kernel_constraint, k_constraint)
    self.assertEqual(conv3d.bias_constraint, b_constraint)
Exemplo n.º 2
0
def confusion_matrix(predictions, labels, num_classes=None,
                     dtype=dtypes.int32, name=None):
  """Computes the confusion matrix from predictions and labels.

  Calculate the Confusion Matrix for a pair of prediction and
  label 1-D int arrays.

  Considering a prediction array such as: `[1, 2, 3]`
  And a label array such as: `[2, 2, 3]`

  The confusion matrix returned would be the following one:
      [[0, 0, 0]
       [0, 1, 0]
       [0, 1, 0]
       [0, 0, 1]]

  Where the matrix rows represent the prediction labels and the columns
  represents the real labels. The confusion matrix is always a 2-D array
  of shape [n, n], where n is the number of valid labels for a given
  classification task. Both prediction and labels must be 1-D arrays of
  the same shape in order for this function to work.

  Args:
    predictions: A 1-D array represeting the predictions for a given
                 classification.
    labels: A 1-D represeting the real labels for the classification task.
    num_classes: The possible number of labels the classification task can
                 have. If this value is not provided, it will be calculated
                 using both predictions and labels array.
    dtype: Data type of the confusion matrix.
    name: Scope name.

  Returns:
    A k X k matrix represeting the confusion matrix, where k is the number of
    possible labels in the classification task.

  Raises:
    ValueError: If both predictions and labels are not 1-D vectors and do not
                have the same size.
  """
  with ops.name_scope(name, 'confusion_matrix',
                      [predictions, labels, num_classes]) as name:
    predictions, labels = metric_ops_util.remove_squeezable_dimensions(
        ops.convert_to_tensor(
            predictions, name='predictions', dtype=dtypes.int64),
        ops.convert_to_tensor(labels, name='labels', dtype=dtypes.int64))

    if num_classes is None:
      num_classes = math_ops.maximum(math_ops.reduce_max(predictions),
                                     math_ops.reduce_max(labels)) + 1

    shape = array_ops.pack([num_classes, num_classes])
    indices = array_ops.transpose(array_ops.pack([predictions, labels]))
    values = array_ops.ones_like(predictions, dtype)
    cm_sparse = ops.SparseTensor(
        indices=indices, values=values, shape=shape)
    zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype)

    return sparse_ops.sparse_add(zero_matrix, cm_sparse)
Exemplo n.º 3
0
def softmax(x, axis=-1):
  """The softmax activation function transforms the outputs so that all values are in

  range (0, 1) and sum to 1. It is often used as the activation for the last
  layer of a classification network because the result could be interpreted as
  a probability distribution. The softmax of x is calculated by
  exp(x)/tf.reduce_sum(exp(x)).

  Arguments:
      x : Input tensor.
      axis: Integer, axis along which the softmax normalization is applied.

  Returns:
      Tensor, output of softmax transformation (all values are non-negative
        and sum to 1).

  Raises:
      ValueError: In case `dim(x) == 1`.
  """
  ndim = K.ndim(x)
  if ndim == 2:
    return nn.softmax(x)
  elif ndim > 2:
    e = math_ops.exp(x - math_ops.reduce_max(x, axis=axis, keepdims=True))
    s = math_ops.reduce_sum(e, axis=axis, keepdims=True)
    return e / s
  else:
    raise ValueError('Cannot apply softmax to a tensor that is 1D. '
                     'Received input: %s' % (x,))
  def finalize(self, outputs, final_state, sequence_lengths):
    """Finalize and return the predicted_ids.

    Args:
      outputs: An instance of BeamSearchDecoderOutput.
      final_state: An instance of BeamSearchDecoderState. Passed through to the
        output.
      sequence_lengths: An `int64` tensor shaped `[batch_size, beam_width]`.
        The sequence lengths determined for each beam during decode.
        **NOTE** These are ignored; the updated sequence lengths are stored in
        `final_state.lengths`.

    Returns:
      outputs: An instance of `FinalBeamSearchDecoderOutput` where the
        predicted_ids are the result of calling _gather_tree.
      final_state: The same input instance of `BeamSearchDecoderState`.
    """
    del sequence_lengths
    # Get max_sequence_length across all beams for each batch.
    max_sequence_lengths = math_ops.to_int32(
        math_ops.reduce_max(final_state.lengths, axis=1))
    predicted_ids = beam_search_ops.gather_tree(
        outputs.predicted_ids,
        outputs.parent_ids,
        max_sequence_lengths=max_sequence_lengths,
        end_token=self._end_token)
    outputs = FinalBeamSearchDecoderOutput(
        beam_search_decoder_output=outputs, predicted_ids=predicted_ids)
    return outputs, final_state
Exemplo n.º 5
0
 def grow_tree_from_stats_summaries(stats_summary_list):
   """Updates ensemble based on the best gains from stats summaries."""
   (node_ids_per_feature, gains_list, thresholds_list,
    left_node_contribs_list, right_node_contribs_list) = (
        boosted_trees_ops.calculate_best_gains_per_feature(
            node_id_range=array_ops.stack([
                math_ops.reduce_min(node_ids),
                math_ops.reduce_max(node_ids)
            ]),
            stats_summary_list=stats_summary_list,
            l1=tree_hparams.l1,
            l2=tree_hparams.l2,
            tree_complexity=tree_hparams.tree_complexity,
            max_splits=max_splits))
   grow_op = boosted_trees_ops.update_ensemble(
       # Confirm if local_tree_ensemble or tree_ensemble should be used.
       tree_ensemble.resource_handle,
       feature_ids=math_ops.range(0, num_features, dtype=dtypes.int32),
       node_ids=node_ids_per_feature,
       gains=gains_list,
       thresholds=thresholds_list,
       left_node_contribs=left_node_contribs_list,
       right_node_contribs=right_node_contribs_list,
       learning_rate=tree_hparams.learning_rate,
       max_depth=tree_hparams.max_depth,
       pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
   return grow_op
  def testDictionary(self):
    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.float32)
        pi = array_ops.placeholder(dtypes.int64)
        gi = array_ops.placeholder(dtypes.int64)
        v = 2. * (array_ops.zeros([128, 128]) + x)
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea(
            [dtypes.float32, dtypes.float32],
            shapes=[[], [128, 128]],
            names=['x', 'v'])
        stage = stager.put(pi, {'x': x, 'v': v})
        key, ret = stager.get(gi)
        z = ret['x']
        y = ret['v']
        y = math_ops.reduce_max(z * math_ops.matmul(y, y))

    G.finalize()

    with self.session(use_gpu=True, graph=G) as sess:
      sess.run(stage, feed_dict={x: -1, pi: 0})
      for i in range(10):
        _, yval = sess.run([stage, y], feed_dict={x: i, pi: i + 1, gi: i})
        self.assertAllClose(
            4 * (i - 1) * (i - 1) * (i - 1) * 128, yval, rtol=1e-4)
Exemplo n.º 7
0
def functional_rnn(cell, inputs, sequence_length=None,
                   initial_state=None, dtype=None, time_major=False,
                   scope=None, use_tpu=False):
  """Same interface as `tf.nn.dynamic_rnn`."""
  with variable_scope.variable_scope(scope or 'rnn'):
    if not time_major:
      inputs = nest.map_structure(
          lambda t: array_ops.transpose(t, [1, 0, 2]), inputs)
    inputs_flat = nest.flatten(inputs)
    batch_size = array_ops.shape(inputs_flat[0])[1]
    if initial_state is None:
      initial_state = cell.zero_state(batch_size, dtype)
    func_cell = _FunctionalRnnCell(cell, inputs, initial_state)
  if sequence_length is not None:
    max_length = math_ops.reduce_max(sequence_length)
  else:
    max_length = None
  extended_acc_state, extended_final_state = recurrent.Recurrent(
      theta=func_cell.theta,
      state0=func_cell.extended_initial_state,
      inputs=inputs,
      cell_fn=func_cell.cell_step,
      max_input_length=max_length,
      use_tpu=use_tpu)
  tf_output, tf_state = _PostProcessOutput(
      extended_acc_state, extended_final_state, func_cell,
      inputs_flat[0].shape[0], sequence_length)

  if time_major:
    tf_output = array_ops.transpose(tf_output, [1, 0, 2])
  return tf_output, tf_state
Exemplo n.º 8
0
def dense_labels_to_sparse(dense, length):
  """Convert dense labels with sequence lengths to sparse tensor.

  Args:
    dense: tensor of shape [batch, max_length]
    length: int tensor of shape [batch]
      The length of each sequence in dense.

  Returns:
    tf.SparseTensor with values only for the valid elements of sequences.
  """

  flat_values = array_ops.reshape(dense, [-1])
  flat_indices = math_ops.range(
      array_ops.shape(flat_values, out_type=dtypes.int64)[0])
  mask = array_ops.sequence_mask(length, maxlen=array_ops.shape(dense)[1])
  flat_mask = array_ops.reshape(mask, [-1])
  indices = array_ops.expand_dims(
      array_ops.boolean_mask(flat_indices, flat_mask), 1)
  values = array_ops.boolean_mask(flat_values, flat_mask)
  sparse = sparse_tensor.SparseTensor(
      indices=indices, values=math_ops.cast(values, dtypes.int32),
      dense_shape=array_ops.shape(flat_values, out_type=dtypes.int64))
  reshaped = sparse_ops.sparse_reshape(sparse, array_ops.shape(dense))
  max_length = math_ops.reduce_max(length)
  return sparse_tensor.SparseTensor(
      indices=reshaped.indices,
      values=reshaped.values,
      dense_shape=[
          math_ops.cast(reshaped.dense_shape[0], dtypes.int64),
          math_ops.cast(max_length, dtypes.int64)])
Exemplo n.º 9
0
  def __call__(self, inputs, state, scope=None):
    """Build the CrfDecodeForwardRnnCell.

    Args:
      inputs: A [batch_size, num_tags] matrix of unary potentials.
      state: A [batch_size, num_tags] matrix containing the previous step's
            score values.
      scope: Unused variable scope of this cell.

    Returns:
      backpointers: A [batch_size, num_tags] matrix of backpointers.
      new_state: A [batch_size, num_tags] matrix of new score values.
    """
    # For simplicity, in shape comments, denote:
    # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
    state = array_ops.expand_dims(state, 2)                         # [B, O, 1]

    # This addition op broadcasts self._transitions_params along the zeroth
    # dimension and state along the second dimension.
    # [B, O, 1] + [1, O, O] -> [B, O, O]
    transition_scores = state + self._transition_params             # [B, O, O]
    new_state = inputs + math_ops.reduce_max(transition_scores, [1])  # [B, O]
    backpointers = math_ops.argmax(transition_scores, 1)
    backpointers = math_ops.cast(backpointers, dtype=dtypes.int32)    # [B, O]
    return backpointers, new_state
Exemplo n.º 10
0
def collapse_repeated(labels, seq_length, name=None):
  """Merge repeated labels into single labels.

  Args:
    labels: Tensor of shape [batch, max value in seq_length]
    seq_length: Tensor of shape [batch], sequence length of each batch element.
    name: A name for this `Op`. Defaults to "collapse_repeated_labels".

  Returns:
    A tuple `(collapsed_labels, new_seq_length)` where

    collapsed_labels: Tensor of shape [batch, max_seq_length] with repeated
    labels collapsed and padded to max_seq_length, eg:
    `[[A, A, B, B, A], [A, B, C, D, E]] => [[A, B, A, 0, 0], [A, B, C, D, E]]`

    new_seq_length: int tensor of shape [batch] with new sequence lengths.
  """

  with ops.name_scope(name, "collapse_repeated_labels", [labels, seq_length]):
    labels = ops.convert_to_tensor(labels, name="labels")
    seq_length = ops.convert_to_tensor(seq_length, name="seq_length")

    # Mask labels that don't equal previous label.
    label_mask = array_ops.concat([
        array_ops.ones_like(labels[:, :1], dtypes.bool),
        math_ops.not_equal(labels[:, 1:], labels[:, :-1])
    ],
                                  axis=1)

    # Filter labels that aren't in the original sequence.
    maxlen = _get_dim(labels, 1)
    seq_mask = array_ops.sequence_mask(seq_length, maxlen=maxlen)
    label_mask = math_ops.logical_and(label_mask, seq_mask)

    # Count masks for new sequence lengths.
    new_seq_len = math_ops.reduce_sum(
        math_ops.cast(label_mask, dtypes.int32), axis=1)

    # Mask indexes based on sequence length mask.
    new_maxlen = math_ops.reduce_max(new_seq_len)
    idx_mask = array_ops.sequence_mask(new_seq_len, maxlen=new_maxlen)

    # Flatten everything and mask out labels to keep and sparse indices.
    flat_labels = array_ops.reshape(labels, [-1])
    flat_label_mask = array_ops.reshape(label_mask, [-1])
    flat_idx_mask = array_ops.reshape(idx_mask, [-1])
    idx = math_ops.range(_get_dim(flat_idx_mask, 0))

    # Scatter to flat shape.
    flat = array_ops.scatter_nd(
        indices=array_ops.expand_dims(
            array_ops.boolean_mask(idx, flat_idx_mask), axis=1),
        updates=array_ops.boolean_mask(flat_labels, flat_label_mask),
        shape=array_ops.shape(flat_idx_mask))

    # Reshape back to square batch.
    batch_size = _get_dim(labels, 0)
    new_shape = [batch_size, new_maxlen]
    return (array_ops.reshape(flat, new_shape),
            math_ops.cast(new_seq_len, seq_length.dtype))
Exemplo n.º 11
0
  def _call_cell(self,
                 inputs,
                 initial_cell_state=None,
                 initial_output=None,
                 dtype=None,
                 sequence_length=None):
    """Run this LSTM on inputs, starting from the given state.

    Args:
      inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`
      initial_cell_state: initial value for cell state, shape `[batch_size,
        self._num_units]`
      initial_output: initial value of cell output, shape `[batch_size,
        self._num_units]`
      dtype: The data type for the initial state and expected output.
      sequence_length: Specifies the length of each sequence in inputs. An
        `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0,
        time_len)` or None.

    Returns:
      A pair containing:

      - Cell state (cs): A `3-D` tensor of shape `[time_len, batch_size,
                         output_size]`
      - Output (h): A `3-D` tensor of shape `[time_len, batch_size,
                    output_size]`
    """

    inputs_shape = inputs.get_shape().with_rank(3)
    time_len = inputs_shape.dims[0].value
    if time_len is None:
      time_len = array_ops.shape(inputs)[0]

    if self._use_peephole:
      wci = self._w_i_diag
      wco = self._w_o_diag
      wcf = self._w_f_diag
    else:
      wci = wcf = wco = array_ops.zeros([self._num_units], dtype=dtype)

    if sequence_length is None:
      max_seq_len = math_ops.cast(time_len, dtypes.int64)
    else:
      max_seq_len = math_ops.cast(math_ops.reduce_max(sequence_length),
                                  dtypes.int64)

    _, cs, _, _, _, _, h = gen_lstm_ops.block_lstm(
        seq_len_max=max_seq_len,
        x=inputs,
        cs_prev=initial_cell_state,
        h_prev=initial_output,
        w=self._kernel,
        wci=wci,
        wcf=wcf,
        wco=wco,
        b=self._bias,
        forget_bias=self._forget_bias,
        cell_clip=self._cell_clip,
        use_peephole=self._use_peephole)
    return cs, h
Exemplo n.º 12
0
  def _call_cell(self, inputs, initial_cell_state, initial_output, dtype,
                 sequence_length):
    """Run this LSTM on inputs, starting from the given state.

    Args:
      inputs: `3-D` tensor with shape `[time_len x batch_size x input_size]`
      initial_cell_state: initial value for cell state, shape `[batch_size,
        self._num_units]`
      initial_output: initial value of cell output, shape `[batch_size,
        self._num_units]`
      dtype: The data type for the initial state and expected output.
      sequence_length: Specifies the length of each sequence in inputs. An int32
        or int64 vector (tensor) size [batch_size], values in [0, time_len) or
          None.

    Returns:
      A pair containing:
      - Cell state (cs): A `3-D` tensor of shape `[time_len x batch_size x
                         output_size]`
      - Output (h): A `3-D` tensor of shape `[time_len x batch_size x
                    output_size]`
    """

    inputs_shape = inputs.get_shape().with_rank(3)
    time_len = inputs_shape[0].value
    if time_len is None:
      time_len = array_ops.shape(inputs)[0]
    input_size = inputs_shape[2].value
    w = vs.get_variable(
        "W_0", [input_size + self._num_units, self._num_units * 4], dtype=dtype)
    b = vs.get_variable(
        "B", [w.get_shape().with_rank(2)[1]],
        initializer=init_ops.constant_initializer(0.0),
        dtype=dtype)
    if self._use_peephole:
      wci = vs.get_variable("W_I_diag", [self._num_units], dtype=dtype)
      wco = vs.get_variable("W_O_diag", [self._num_units], dtype=dtype)
      wcf = vs.get_variable("W_F_diag", [self._num_units], dtype=dtype)
    else:
      wci = wco = wcf = array_ops.zeros([self._num_units], dtype=dtype)

    if sequence_length is None:
      max_seq_len = time_len
    else:
      max_seq_len = math_ops.to_int64(math_ops.reduce_max(sequence_length))

    _, cs, _, _, _, _, h = _lstm_ops_so.block_lstm(
        seq_len_max=max_seq_len,
        x=inputs,
        cs_prev=initial_cell_state,
        h_prev=initial_output,
        w=w,
        wci=wci,
        wco=wco,
        wcf=wcf,
        b=b,
        forget_bias=self._forget_bias,
        cell_clip=self._cell_clip,
        use_peephole=self._use_peephole)
    return cs, h
def gather_tree_from_array(t, parent_ids, sequence_length):
  """Calculates the full beams for `TensorArray`s.

  Args:
    t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of
      shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]`
      where `s` is the depth shape.
    parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`.
    sequence_length: The sequence length of shape `[batch_size, beam_width]`.

  Returns:
    A `Tensor` which is a stacked `TensorArray` of the same size and type as
    `t` and where beams are sorted in each `Tensor` according to `parent_ids`.
  """
  max_time = parent_ids.shape[0].value or array_ops.shape(parent_ids)[0]
  batch_size = parent_ids.shape[1].value or array_ops.shape(parent_ids)[1]
  beam_width = parent_ids.shape[2].value or array_ops.shape(parent_ids)[2]

  # Generate beam ids that will be reordered by gather_tree.
  beam_ids = array_ops.expand_dims(
      array_ops.expand_dims(math_ops.range(beam_width), 0), 0)
  beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1])

  mask = array_ops.sequence_mask(
      sequence_length, maxlen=max_time, dtype=dtypes.int32)
  mask = array_ops.transpose(mask, perm=[2, 0, 1])

  # Use beam_width + 1 to mark the end of beam.
  masked_beam_ids = (beam_ids * mask) + (1 - mask) * (beam_width + 1)

  max_sequence_lengths = math_ops.to_int32(
      math_ops.reduce_max(sequence_length, axis=1))
  sorted_beam_ids = beam_search_ops.gather_tree(
      step_ids=masked_beam_ids,
      parent_ids=parent_ids,
      max_sequence_lengths=max_sequence_lengths,
      end_token=beam_width + 1)

  # For out of range steps, simply copy the same beam.
  sorted_beam_ids = array_ops.where(
      math_ops.cast(mask, dtypes.bool), x=sorted_beam_ids, y=beam_ids)

  # Generate indices for gather_nd.
  time_ind = array_ops.tile(array_ops.reshape(
      math_ops.range(max_time), [-1, 1, 1]), [1, batch_size, beam_width])
  batch_ind = array_ops.tile(array_ops.reshape(
      math_ops.range(batch_size), [-1, 1, 1]), [1, max_time, beam_width])
  batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2])
  indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1)

  # Gather from a tensor with collapsed additional dimensions.
  gather_from = t
  final_shape = array_ops.shape(gather_from)
  gather_from = array_ops.reshape(
      gather_from, [max_time, batch_size, beam_width, -1])
  ordered = array_ops.gather_nd(gather_from, indices)
  ordered = array_ops.reshape(ordered, final_shape)

  return ordered
Exemplo n.º 14
0
 def _show_max_abs(tensor):
   tensor = math_ops.cast(tensor, dtypes.float32)
   output_tensor = math_ops.reduce_max(math_ops.abs(tensor))
   zero = constant_op.constant(0, dtypes.float32)
   output_tensor = gen_math_ops.maximum(zero, output_tensor)
   # The shape has to be 1. Set it if it does not have the information.
   output_tensor = array_ops.reshape(output_tensor, [1])
   return output_tensor
Exemplo n.º 15
0
 def testGradient4(self):
   s = [2, 3, 4, 2]
   x = np.arange(1.0, 49.0).reshape(s).astype(np.float64)
   with self.test_session():
     t = ops.convert_to_tensor(x)
     su = math_ops.reduce_max(t)
     jacob_t, jacob_n = gradient_checker.compute_gradient(
         t, s, su, [1], x_init_value=x, delta=1)
   self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
Exemplo n.º 16
0
 def compute_best_f1_score(tp, fp, fn, name):
   precision_at_t = math_ops.div(tp, epsilon + tp + fp,
                                 name='precision_' + name)
   recall_at_t = math_ops.div(tp, epsilon + tp + fn, name='recall_' + name)
   # Compute F1 score.
   f1_at_thresholds = (
       2.0 * precision_at_t * recall_at_t /
       (precision_at_t + recall_at_t + epsilon))
   return math_ops.reduce_max(f1_at_thresholds)
Exemplo n.º 17
0
 def _compare_cdf(self, values):
   abs_values = math_ops.abs(values)
   max_value = math_ops.reduce_max(abs_values)
   with self.cached_session():
     variables.global_variables_initializer().run()
     cdf_from_histogram = pruning_utils.compute_cdf_from_histogram(
         abs_values, [0.0, max_value], nbins=pruning_utils._NBINS)
     cdf = pruning_utils.compute_cdf(abs_values, [0.0, max_value])
     self.assertAllEqual(cdf.eval(), cdf_from_histogram.eval())
Exemplo n.º 18
0
def compute_cdf(values, value_range, **kwargs):
  """Returns the normalized cumulative distribution of the given values tensor.

  Uses tf.while_loop to directly compute the cdf of the values. Number of bins
  for histogram is fixed at _NBINS=255

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`
    **kwargs: keyword arguments: name

  Returns:
    A 1-D `Tensor` holding normalized cdf of values.

  """
  nbins = _NBINS
  name = kwargs.get('name', None)
  with ops.name_scope(name, 'cdf', [values, value_range, nbins]):
    values = ops.convert_to_tensor(values, name='values')
    value_range = ops.convert_to_tensor(value_range, name='value_range')
    nbins_float = np.float32(nbins)

    # Map tensor values that fall within value_range to [0, 1].
    scaled_values = math_ops.truediv(
        values - value_range[0],
        value_range[1] - value_range[0],
        name='scaled_values')

    # map tensor values within the open interval value_range to {0,.., nbins-1},
    # values outside the open interval will be zero or less, or nbins or more.
    indices = math_ops.floor(nbins_float * scaled_values, name='indices')

    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
    indices = math_ops.cast(
        clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)

    cdf = array_ops.zeros(nbins)
    i = constant_op.constant(0)

    def loop_cond(loop_count, _):
      return math_ops.less(loop_count, nbins)

    def loop_body(loop_count, cdf):
      temp = math_ops.reduce_sum(
          math_ops.cast(
              math_ops.less_equal(indices, loop_count), dtypes.float32))
      cdf = math_ops.add(
          cdf,
          array_ops.one_hot(
              loop_count, depth=_NBINS, on_value=temp, off_value=0.0))
      return [loop_count + 1, cdf]

    _, cdf = control_flow_ops.while_loop(
        loop_cond, loop_body, [i, cdf], maximum_iterations=nbins)

    return math_ops.div(cdf, math_ops.reduce_max(cdf))
Exemplo n.º 19
0
 def testConstraints(self):
   g_constraint = lambda x: x / math_ops.reduce_sum(x)
   b_constraint = lambda x: x / math_ops.reduce_max(x)
   bn = normalization_layers.BatchNormalization(axis=1,
                                                gamma_constraint=g_constraint,
                                                beta_constraint=b_constraint)
   inputs = random_ops.random_uniform((5, 4, 3), seed=1)
   bn(inputs)
   self.assertEqual(bn.gamma_constraint, g_constraint)
   self.assertEqual(bn.beta_constraint, b_constraint)
Exemplo n.º 20
0
 def testConstraints(self):
   k_constraint = lambda x: x / math_ops.reduce_sum(x)
   b_constraint = lambda x: x / math_ops.reduce_max(x)
   dense = core_layers.Dense(2,
                             kernel_constraint=k_constraint,
                             bias_constraint=b_constraint)
   inputs = random_ops.random_uniform((5, 3), seed=1)
   dense(inputs)
   self.assertEqual(dense.kernel_constraint, k_constraint)
   self.assertEqual(dense.bias_constraint, b_constraint)
Exemplo n.º 21
0
 def testConstraints(self):
   k_constraint = lambda x: x / math_ops.reduce_sum(x)
   b_constraint = lambda x: x / math_ops.reduce_max(x)
   layer = conv_layers.Conv2DTranspose(2, 3,
                                       kernel_constraint=k_constraint,
                                       bias_constraint=b_constraint)
   inputs = random_ops.random_uniform((5, 3, 3, 5), seed=1)
   layer(inputs)
   self.assertEqual(layer.kernel_constraint, k_constraint)
   self.assertEqual(layer.bias_constraint, b_constraint)
Exemplo n.º 22
0
def sparse_categorical_accuracy(y_true, y_pred):
  y_true = math_ops.reduce_max(y_true, axis=-1)
  y_pred = math_ops.argmax(y_pred, axis=-1)

  # If the expected labels are float, we need to cast the int returned by
  # argmax to compare.
  if K.dtype(y_true) == K.floatx():
    y_pred = math_ops.cast(y_pred, K.floatx())

  return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
Exemplo n.º 23
0
def advanced_softmax(logits, mask=None):
    """ Computes softmax function manually.

    Avoids numeric overflow.

    Args:
        logits: A Tensor. The softmax will apply on the last dimension of it.
        mask: A Tensor with the same shape as `logits`.

    Returns: The softmax results.
    """
    num_shapes = logits.get_shape().ndims
    if mask is not None:
        scores_exp = math_ops.exp(logits - math_ops.reduce_max(logits, axis=num_shapes - 1, keepdims=True)) * mask
    else:
        scores_exp = math_ops.exp(logits - math_ops.reduce_max(logits, axis=num_shapes - 1, keepdims=True))
    scores_sum = math_ops.reduce_sum(scores_exp, axis=num_shapes - 1, keepdims=True)
    x_sm = scores_exp / scores_sum
    return x_sm
Exemplo n.º 24
0
def crf_decode(potentials, transition_params, sequence_length):
  """Decode the highest scoring sequence of tags in TensorFlow.

  This is a function for tensor.

  Args:
    potentials: A [batch_size, max_seq_len, num_tags] tensor of
              unary potentials.
    transition_params: A [num_tags, num_tags] matrix of
              binary potentials.
    sequence_length: A [batch_size] vector of true sequence lengths.

  Returns:
    decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`.
                Contains the highest scoring tag indicies.
    best_score: A [batch_size] vector, containing the score of `decode_tags`.
  """
  # For simplicity, in shape comments, denote:
  # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output).
  num_tags = potentials.get_shape()[2].value

  # Computes forward decoding. Get last score and backpointers.
  crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params)
  initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
  initial_state = array_ops.squeeze(initial_state, axis=[1])      # [B, O]
  inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1])   # [B, T-1, O]
  backpointers, last_score = rnn.dynamic_rnn(
      crf_fwd_cell,
      inputs=inputs,
      sequence_length=sequence_length - 1,
      initial_state=initial_state,
      time_major=False,
      dtype=dtypes.int32)             # [B, T - 1, O], [B, O]
  backpointers = gen_array_ops.reverse_sequence(
      backpointers, sequence_length - 1, seq_dim=1)               # [B, T-1, O]

  # Computes backward decoding. Extract tag indices from backpointers.
  crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags)
  initial_state = math_ops.cast(math_ops.argmax(last_score, axis=1),
                                dtype=dtypes.int32)               # [B]
  initial_state = array_ops.expand_dims(initial_state, axis=-1)   # [B, 1]
  decode_tags, _ = rnn.dynamic_rnn(
      crf_bwd_cell,
      inputs=backpointers,
      sequence_length=sequence_length - 1,
      initial_state=initial_state,
      time_major=False,
      dtype=dtypes.int32)           # [B, T - 1, 1]
  decode_tags = array_ops.squeeze(decode_tags, axis=[2])           # [B, T - 1]
  decode_tags = array_ops.concat([initial_state, decode_tags], axis=1)  # [B, T]
  decode_tags = gen_array_ops.reverse_sequence(
      decode_tags, sequence_length, seq_dim=1)                     # [B, T]

  best_score = math_ops.reduce_max(last_score, axis=1)             # [B]
  return decode_tags, best_score
Exemplo n.º 25
0
def my_rnn(alphabetEnc, cell, inputs, initial_state=None, dtype=None,
        sequence_length=None, scope=None):

  if not isinstance(cell, rnn_cell.RNNCell):
    raise TypeError("cell must be an instance of RNNCell")
  if not isinstance(inputs, list):
    raise TypeError("inputs must be a list")
  if not inputs:
    raise ValueError("inputs must not be empty")

  outputs = []
  with vs.variable_scope(scope or "RNN"):
    fixed_batch_size = inputs[0].get_shape().with_rank_at_least(1)[0]
    if fixed_batch_size.value:
      batch_size = fixed_batch_size.value
    else:
      batch_size = array_ops.shape(inputs[0])[0]
    if initial_state is not None:
      state = initial_state
    else:
      if not dtype:
        raise ValueError("If no initial_state is provided, dtype must be.")
      state = cell.zero_state(batch_size, dtype)

    if sequence_length is not None:
      sequence_length = math_ops.to_int32(sequence_length)

    if sequence_length:  # Prepare variables
      zero_output = array_ops.zeros(
          array_ops.pack([batch_size, cell.output_size]), inputs[0].dtype)
      zero_output.set_shape(
          tensor_shape.TensorShape([fixed_batch_size.value, cell.output_size]))


      min_sequence_length = math_ops.reduce_min(sequence_length)
      max_sequence_length = math_ops.reduce_max(sequence_length)


    for time, input_ in enumerate(inputs):
      if time > 0: vs.get_variable_scope().reuse_variables()
      # pylint: disable=cell-var-from-loop
      call_cell = lambda: cell([ input_ , alphabetEnc[time] ], state)
      # pylint: enable=cell-var-from-loop
      if sequence_length:

        (output, state) = _rnn_step(
            time, sequence_length, min_sequence_length, max_sequence_length,
            zero_output, state, call_cell)
      else:

        (output, state) = call_cell()

      outputs.append(output)

    return (outputs, state)
Exemplo n.º 26
0
 def testLargeFeed(self):
   server = self._cached_server
   with session.Session(server.target, config=self._useRPCConfig()) as sess:
     feed_val = np.empty([10000, 3000], dtype=np.float32)
     feed_val.fill(0.5)
     p = array_ops.placeholder(dtypes.float32, shape=[10000, 3000])
     min_t = math_ops.reduce_min(p)
     max_t = math_ops.reduce_max(p)
     min_val, max_val = sess.run([min_t, max_t], feed_dict={p: feed_val})
     self.assertEqual(0.5, min_val)
     self.assertEqual(0.5, max_val)
Exemplo n.º 27
0
def _numerically_stable_global_norm(tensor_list):
  """Compute the global norm of a list of Tensors, with improved stability.

  The global norm computation sometimes overflows due to the intermediate L2
  step. To avoid this, we divide by a cheap-to-compute max over the
  matrix elements.

  Args:
    tensor_list: A list of tensors, or `None`.

  Returns:
    A scalar tensor with the global norm.
  """
  if np.all([x is None for x in tensor_list]):
    return 0.0

  list_max = math_ops.reduce_max([math_ops.reduce_max(math_ops.abs(x)) for x in
                                  tensor_list if x is not None])
  return list_max * clip_ops.global_norm([x / list_max for x in tensor_list
                                          if x is not None])
Exemplo n.º 28
0
 def _max_condition_number_to_be_non_singular(self):
   """Return the maximum condition number that we consider nonsingular."""
   with ops.name_scope("max_nonsingular_condition_number"):
     dtype_eps = np.finfo(self.dtype.as_numpy_dtype).eps
     eps = math_ops.cast(
         math_ops.reduce_max([
             100.,
             math_ops.cast(self.range_dimension_tensor(), self.dtype),
             math_ops.cast(self.domain_dimension_tensor(), self.dtype)
         ]), self.dtype) * dtype_eps
     return 1. / eps
Exemplo n.º 29
0
def _calculate_acceptance_probabilities(init_probs, target_probs):
  """Calculate the per-class acceptance rates.

  Args:
    init_probs: The class probabilities of the data.
    target_probs: The desired class proportion in minibatches.
  Returns:
    A list of the per-class acceptance probabilities.

  This method is based on solving the following analysis:

  Let F be the probability of a rejection (on any example).
  Let p_i be the proportion of examples in the data in class i (init_probs)
  Let a_i is the rate the rejection sampler should *accept* class i
  Let t_i is the target proportion in the minibatches for class i (target_probs)

  ```
  F = sum_i(p_i * (1-a_i))
    = 1 - sum_i(p_i * a_i)     using sum_i(p_i) = 1
  ```

  An example with class `i` will be accepted if `k` rejections occur, then an
  example with class `i` is seen by the rejector, and it is accepted. This can
  be written as follows:

  ```
  t_i = sum_k=0^inf(F^k * p_i * a_i)
      = p_i * a_j / (1 - F)    using geometric series identity, since 0 <= F < 1
      = p_i * a_i / sum_j(p_j * a_j)        using F from above
  ```

  Note that the following constraints hold:
  ```
  0 <= p_i <= 1, sum_i(p_i) = 1
  0 <= a_i <= 1
  0 <= t_i <= 1, sum_i(t_i) = 1
  ```


  A solution for a_i in terms of the other variabes is the following:
    ```a_i = (t_i / p_i) / max_i[t_i / p_i]```
  """
  # Make list of t_i / p_i.
  ratio_l = target_probs / init_probs

  # Replace NaNs with 0s.
  ratio_l = math_ops.select(math_ops.is_nan(ratio_l),
                            array_ops.zeros_like(ratio_l),
                            ratio_l)

  # Calculate list of acceptance probabilities.
  max_ratio = math_ops.reduce_max(ratio_l)
  return ratio_l / max_ratio
Exemplo n.º 30
0
 def testConstraints(self):
   d_constraint = lambda x: x / math_ops.reduce_sum(x)
   p_constraint = lambda x: x / math_ops.reduce_sum(x)
   b_constraint = lambda x: x / math_ops.reduce_max(x)
   layer = conv_layers.SeparableConv2D(2, 3,
                                       depthwise_constraint=d_constraint,
                                       pointwise_constraint=p_constraint,
                                       bias_constraint=b_constraint)
   inputs = random_ops.random_uniform((5, 3, 3, 5), seed=1)
   layer(inputs)
   self.assertEqual(layer.depthwise_constraint, d_constraint)
   self.assertEqual(layer.pointwise_constraint, p_constraint)
   self.assertEqual(layer.bias_constraint, b_constraint)
def matrix_exponential(input, name=None):  # pylint: disable=redefined-builtin
    r"""Computes the matrix exponential of one or more square matrices.

  exp(A) = \sum_{n=0}^\infty A^n/n!

  The exponential is computed using a combination of the scaling and squaring
  method and the Pade approximation. Details can be found in:
  Nicholas J. Higham, "The scaling and squaring method for the matrix
  exponential revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005.

  The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
  form square matrices. The output is a tensor of the same shape as the input
  containing the exponential for all input submatrices `[..., :, :]`.

  Args:
    input: A `Tensor`. Must be `float16`, `float32`, `float64`, `complex64`,
      or `complex128` with shape `[..., M, M]`.
    name:  A name to give this `Op` (optional).

  Returns:
    the matrix exponential of the input.

  Raises:
    ValueError: An unsupported type is provided as input.

  @compatibility(scipy)
  Equivalent to scipy.linalg.expm
  @end_compatibility
  """
    with ops.name_scope(name, 'matrix_exponential', [input]):
        matrix = ops.convert_to_tensor(input, name='input')
        if matrix.shape[-2:] == [0, 0]:
            return matrix
        batch_shape = matrix.shape[:-2]
        if not batch_shape.is_fully_defined():
            batch_shape = array_ops.shape(matrix)[:-2]

        # reshaping the batch makes the where statements work better
        matrix = array_ops.reshape(
            matrix,
            array_ops.concat(([-1], array_ops.shape(matrix)[-2:]), axis=0))
        l1_norm = math_ops.reduce_max(math_ops.reduce_sum(
            math_ops.abs(matrix),
            axis=array_ops.size(array_ops.shape(matrix)) - 2),
                                      axis=-1)
        const = lambda x: constant_op.constant(x, l1_norm.dtype)

        def _nest_where(vals, cases):
            assert len(vals) == len(cases) - 1
            if len(vals) == 1:
                return array_ops.where(math_ops.less(l1_norm, const(vals[0])),
                                       cases[0], cases[1])
            else:
                return array_ops.where(math_ops.less(l1_norm, const(vals[0])),
                                       cases[0],
                                       _nest_where(vals[1:], cases[1:]))

        if matrix.dtype in [dtypes.float16, dtypes.float32, dtypes.complex64]:
            maxnorm = const(3.925724783138660)
            squarings = math_ops.maximum(
                math_ops.floor(
                    math_ops.log(l1_norm / maxnorm) /
                    math_ops.log(const(2.0))), 0)
            u3, v3 = _matrix_exp_pade3(matrix)
            u5, v5 = _matrix_exp_pade5(matrix)
            u7, v7 = _matrix_exp_pade7(matrix / math_ops.pow(
                constant_op.constant(2.0, dtype=matrix.dtype),
                math_ops.cast(squarings, matrix.dtype))[..., array_ops.newaxis,
                                                        array_ops.newaxis])
            conds = (4.258730016922831e-001, 1.880152677804762e+000)
            u = _nest_where(conds, (u3, u5, u7))
            v = _nest_where(conds, (v3, v5, v7))
        elif matrix.dtype in [dtypes.float64, dtypes.complex128]:
            maxnorm = const(5.371920351148152)
            squarings = math_ops.maximum(
                math_ops.floor(
                    math_ops.log(l1_norm / maxnorm) /
                    math_ops.log(const(2.0))), 0)
            u3, v3 = _matrix_exp_pade3(matrix)
            u5, v5 = _matrix_exp_pade5(matrix)
            u7, v7 = _matrix_exp_pade7(matrix)
            u9, v9 = _matrix_exp_pade9(matrix)
            u13, v13 = _matrix_exp_pade13(matrix / math_ops.pow(
                constant_op.constant(2.0, dtype=matrix.dtype),
                math_ops.cast(squarings, matrix.dtype))[..., array_ops.newaxis,
                                                        array_ops.newaxis])
            conds = (1.495585217958292e-002, 2.539398330063230e-001,
                     9.504178996162932e-001, 2.097847961257068e+000)
            u = _nest_where(conds, (u3, u5, u7, u9, u13))
            v = _nest_where(conds, (v3, v5, v7, v9, v13))
        else:
            raise ValueError(
                'tf.linalg.expm does not support matrices of type %s' %
                matrix.dtype)
        numer = u + v
        denom = -u + v
        result = linalg_ops.matrix_solve(denom, numer)
        max_squarings = math_ops.reduce_max(squarings)

        i = const(0.0)
        c = lambda i, r: math_ops.less(i, max_squarings)

        def b(i, r):
            return i + 1, array_ops.where(math_ops.less(i, squarings),
                                          math_ops.matmul(r, r), r)

        _, result = control_flow_ops.while_loop(c, b, [i, result])
        if not matrix.shape.is_fully_defined():
            return array_ops.reshape(
                result,
                array_ops.concat((batch_shape, array_ops.shape(result)[-2:]),
                                 axis=0))
        return array_ops.reshape(result,
                                 batch_shape.concatenate(result.shape[-2:]))
Exemplo n.º 32
0
def ragged_reduce_aggregate(reduce_op,
                            unsorted_segment_op,
                            rt_input,
                            axis,
                            keepdims,
                            separator=None,
                            name=None):
  """Aggregates across axes of a RaggedTensor using the given `Tensor` ops.

  Reduces `rt_input` along the dimensions given in `axis`.  The rank of the
  tensor is reduced by 1 for each entry in `axis`.  If `axis` is not specified,
  then all dimensions are reduced, and a scalar value is returned.

  This op assumes that `reduce_op` and `unsorted_segment_op` are associative;
  if not, then reducing multiple axes will return incorrect results.  (In
  particular, reducing multiple axes is currently implemented by reducing the
  axes one at a time.)

  Args:
    reduce_op: The tensorflow `op` that should be used to reduce values in
      uniform dimensions.  Must have the same signature and basic behavior as
      `reduce_sum`, `reduce_max`, etc.
    unsorted_segment_op: The tensorflow `op` that should be used to combine
      values in ragged dimensions.  Must have the same signature and basic
      behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc.
    rt_input: A `Tensor` or `RaggedTensor` containing the values to be reduced.
    axis: The axis or axes to reduce.  May be `None` (to reduce all axes), an
      `int` (to reduce a single axis), a `list` or `tuple` of `int` (to reduce a
      given set of axes), or a `Tensor` with a constant value.  Must be in the
      range `[0, rt_input.rank)`.
    keepdims: If true, retains reduced dimensions with length 1.
    separator: An optional string. Defaults to None. The separator to use when
      joining. The separator must not be set for non-string data types. (i.e. if
      separator is not None then it uses string ops)
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the reduced values.  The returned tensor
    has the same dtype as `data`, and its shape is given by removing the
    dimensions specified in `axis` from `rt_input.shape`.  The `ragged_rank`
    of the returned tensor is given by substracting any ragged dimensions
    specified in `axis` from `rt_input.ragged_rank`.
  Raises:
    ValueError: If `axis` contains a `Tensor` whose value is not constant.
  """
  if not ragged_tensor.is_ragged(rt_input):
    if separator is None:
      return reduce_op(rt_input, axis, keepdims=keepdims, name=name)
    else:
      # When separator is not None, We infer that dtype is string and
      # reduce_join will be called.
      return reduce_op(
          rt_input, axis, keepdims=keepdims, name=name, separator=separator)

  if isinstance(axis, ops.Tensor):
    axis = tensor_util.constant_value(axis)
    if axis is None:
      raise ValueError('axis must be known at graph construction time.')
    if isinstance(axis, np.ndarray):
      axis = axis.tolist()

  # When reducing all axes, just ignore splits & reduce the inner values.
  if axis is None:
    result = reduce_op(rt_input.flat_values, None, keepdims=keepdims, name=name)
    if keepdims:
      # Expand the result to the input number of dimensions.
      for _ in rt_input.shape[1:]:
        result = array_ops.expand_dims(result, axis=0)
    return result

  with ops.name_scope(name, 'RaggedReduce', [rt_input, axis]):
    if isinstance(axis, (tuple, list)):
      if not axis:
        return rt_input
      elif len(axis) == 1:
        axis = axis[0]
      else:
        # When reducing multiple axes, as we reduce one at a time (see below),
        # the negative axis has to be converted to positive at the first run
        # as the sort with negative axis will have different orders.
        # See GitHub issue 27497.
        axis = [
            array_ops.get_positive_axis(a, rt_input.shape.ndims, 'axis[%s]' % i,
                                        'rank(input_tensor)')
            for i, a in enumerate(axis)
        ]
        # When reducing multiple axes, just reduce one at a time.  This is less
        # efficient, and only works for associative ops.  (In particular, it
        # does not work for reduce_mean.)  However, reducing multiple axes at
        # once will probably require a nontrivial c++ op.
        axis = sorted(axis)
        inner_reduced = ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                                rt_input, axis[-1], keepdims,
                                                separator)
        return ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                       inner_reduced, axis[:-1], keepdims,
                                       separator)

    rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        rt_input, name='rt_input')

    axis = array_ops.get_positive_axis(
        axis, rt_input.shape.ndims, ndims_name='rank(input_tensor)')

    if axis == 0:
      # out[i_1, i_2, ..., i_N] = sum_{j} rt_input[j, i_1, i_2, ..., i_N]
      row_lengths = rt_input.row_splits[1:] - rt_input.row_splits[:-1]
      num_segments = math_ops.maximum(math_ops.reduce_max(row_lengths), 0)
      segment_ids = range(row_lengths).values
      result = _ragged_segment_aggregate(unsorted_segment_op, rt_input.values,
                                         segment_ids, num_segments, separator)
      if keepdims:
        result = array_ops.expand_dims(result, axis=0)
      return result
    elif axis == 1:
      # out[i_0, i_1, i_2, ..., i_N] = sum_{j} rt_input[i_0, j, i_2, ..., i_N]
      num_segments = array_ops.shape(rt_input.row_splits)[0] - 1
      segment_ids = segment_id_ops.row_splits_to_segment_ids(
          rt_input.row_splits)
      result = _ragged_segment_aggregate(unsorted_segment_op, rt_input.values,
                                         segment_ids, num_segments, separator)
      if keepdims:
        result = array_ops.expand_dims(result, axis=1)
      return result
    else:
      # out[i_0, ..., i_[axis-1], i_axis+1], ..., i_N] =
      #     sum_{j} rt_input [i_0, ..., i_[axis-1], j, i_axis+1], ..., i_N]
      return rt_input.with_values(
          ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                  rt_input.values, axis - 1, keepdims,
                                  separator))
Exemplo n.º 33
0
 def _iter_condition(i, mat_m, _):
     return math_ops.logical_and(
         i < iter_count,
         math_ops.reduce_max(math_ops.abs(mat_m - identity)) > epsilon)
Exemplo n.º 34
0
def _calculate_acceptance_probs_with_mixing(initial_probs, target_probs):
    """Calculates the acceptance probabilities and mixing ratio.

  In this case, we assume that we can *either* sample from the original data
  distribution with probability `m`, or sample from a reshaped distribution
  that comes from rejection sampling on the original distribution. This
  rejection sampling is done on a per-class basis, with `a_i` representing the
  probability of accepting data from class `i`.

  This method is based on solving the following analysis for the reshaped
  distribution:

  Let F be the probability of a rejection (on any example).
  Let p_i be the proportion of examples in the data in class i (init_probs)
  Let a_i is the rate the rejection sampler should *accept* class i
  Let t_i is the target proportion in the minibatches for class i (target_probs)

  ```
  F = sum_i(p_i * (1-a_i))
    = 1 - sum_i(p_i * a_i)     using sum_i(p_i) = 1
  ```

  An example with class `i` will be accepted if `k` rejections occur, then an
  example with class `i` is seen by the rejector, and it is accepted. This can
  be written as follows:

  ```
  t_i = sum_k=0^inf(F^k * p_i * a_i)
      = p_i * a_j / (1 - F)    using geometric series identity, since 0 <= F < 1
      = p_i * a_i / sum_j(p_j * a_j)        using F from above
  ```

  Note that the following constraints hold:
  ```
  0 <= p_i <= 1, sum_i(p_i) = 1
  0 <= a_i <= 1
  0 <= t_i <= 1, sum_i(t_i) = 1
  ```

  A solution for a_i in terms of the other variables is the following:
    ```a_i = (t_i / p_i) / max_i[t_i / p_i]```

  If we try to minimize the amount of data rejected, we get the following:

  M_max = max_i [ t_i / p_i ]
  M_min = min_i [ t_i / p_i ]

  The desired probability of accepting data if it comes from class `i`:

  a_i = (t_i/p_i - m) / (M_max - m)

  The desired probability of pulling a data element from the original dataset,
  rather than the filtered one:

  m = M_min

  Args:
    initial_probs: A Tensor of the initial probability distribution, given or
      estimated.
    target_probs: A Tensor of the corresponding classes.

  Returns:
    (A 1D Tensor with the per-class acceptance probabilities, the desired
    probability of pull from the original distribution.)
  """
    ratio_l = _get_target_to_initial_ratio(initial_probs, target_probs)
    max_ratio = math_ops.reduce_max(ratio_l)
    min_ratio = math_ops.reduce_min(ratio_l)

    # Target prob to sample from original distribution.
    m = min_ratio

    # TODO(joelshor): Simplify fraction, if possible.
    a_i = (ratio_l - m) / (max_ratio - m)
    return a_i, m
Exemplo n.º 35
0
 def _single_seq_fn():
     squeezed_potentials = array_ops.squeeze(potentials, [1])
     decode_tags = array_ops.expand_dims(
         math_ops.argmax(squeezed_potentials, axis=1), 1)
     best_score = math_ops.reduce_max(squeezed_potentials, axis=1)
     return math_ops.cast(decode_tags, dtype=dtypes.int32), best_score
Exemplo n.º 36
0
def _sample_max(values):
    """Max over sample indices.  In this module this is always [0]."""
    return math_ops.reduce_max(values, reduction_indices=[0])
Exemplo n.º 37
0
    def _training_examples_and_variables():
      """Returns dictionaries for training examples and variables."""
      batch_size = targets.get_shape()[0]

      # Iterate over all feature columns and create appropriate lists for dense
      # and sparse features as well as dense and sparse weights (variables) for
      # SDCA.
      # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables
      # dict as 1-dimensional tensors.
      dense_features, sparse_features, sparse_feature_with_values = [], [], []
      dense_feature_weights = []
      sparse_feature_weights, sparse_feature_with_values_weights = [], []
      for column in sorted(columns_to_variables.keys(), key=lambda x: x.key):
        transformed_tensor = features[column]
        if isinstance(column, layers.feature_column._RealValuedColumn):  # pylint: disable=protected-access
          # A real-valued column corresponds to a dense feature in SDCA. A
          # transformed tensor corresponding to a RealValuedColumn should have
          # rank at most 2. In order to be passed to SDCA, its rank needs to be
          # exactly 2 (i.e., its shape should be [batch_size, column.dim]).
          check_rank_op = control_flow_ops.Assert(
              math_ops.less_equal(array_ops.rank(transformed_tensor), 2),
              ['transformed_tensor shouls have rank at most 2.'])
          # Reshape to [batch_size, dense_column_dimension].
          with ops.control_dependencies([check_rank_op]):
            transformed_tensor = array_ops.reshape(transformed_tensor, [
                array_ops.shape(transformed_tensor)[0], -1
            ])

          dense_features.append(transformed_tensor)
          # For real valued columns, the variables list contains exactly one
          # element.
          dense_feature_weights.append(columns_to_variables[column][0])
        elif isinstance(column, layers.feature_column._BucketizedColumn):  # pylint: disable=protected-access
          # A bucketized column corresponds to a sparse feature in SDCA. The
          # bucketized feature is "sparsified" for SDCA by converting it to a
          # SparseFeatureColumn respresenting the one-hot encoding of the
          # bucketized feature.
          #
          # TODO(sibyl-vie3Poto): Explore whether it is more efficient to translate a
          # bucketized feature column to a dense feature in SDCA. This will
          # likely depend on the number of buckets.
          dense_bucket_tensor = column._to_dnn_input_layer(transformed_tensor)  # pylint: disable=protected-access
          sparse_feature_column = _dense_tensor_to_sparse_feature_column(
              dense_bucket_tensor)
          sparse_feature_with_values.append(sparse_feature_column)
          # If a partitioner was used during variable creation, we will have a
          # list of Variables here larger than 1.
          vars_to_append = columns_to_variables[column][0]
          if len(columns_to_variables[column]) > 1:
            vars_to_append = columns_to_variables[column]
          sparse_feature_with_values_weights.append(vars_to_append)
        elif isinstance(
            column,
            (
                layers.feature_column._WeightedSparseColumn,  # pylint: disable=protected-access
                layers.feature_column._CrossedColumn,  # pylint: disable=protected-access
                layers.feature_column._SparseColumn)):  # pylint: disable=protected-access

          if isinstance(column, layers.feature_column._WeightedSparseColumn):  # pylint: disable=protected-access
            id_tensor = column.id_tensor(transformed_tensor)
            weight_tensor = array_ops.reshape(
                column.weight_tensor(transformed_tensor).values, [-1])
          else:
            id_tensor = transformed_tensor
            weight_tensor = array_ops.ones(
                [array_ops.shape(id_tensor.indices)[0]], dtypes.float32)

          example_ids = array_ops.reshape(id_tensor.indices[:, 0], [-1])

          flat_ids = array_ops.reshape(id_tensor.values, [-1])
          # Prune invalid IDs (< 0) from the flat_ids, example_ids, and
          # weight_tensor.  These can come from looking up an OOV entry in the
          # vocabulary (default value being -1).
          is_id_valid = math_ops.greater_equal(flat_ids, 0)
          flat_ids = array_ops.boolean_mask(flat_ids, is_id_valid)
          example_ids = array_ops.boolean_mask(example_ids, is_id_valid)
          weight_tensor = array_ops.boolean_mask(weight_tensor, is_id_valid)

          projection_length = math_ops.reduce_max(flat_ids) + 1
          # project ids based on example ids so that we can dedup ids that
          # occur multiple times for a single example.
          projected_ids = projection_length * example_ids + flat_ids

          # Remove any redudant ids.
          ids, idx = array_ops.unique(projected_ids)
          # Keep only one example id per duplicated ids.
          example_ids_filtered = math_ops.unsorted_segment_min(
              example_ids, idx,
              array_ops.shape(ids)[0])

          # reproject ids back feature id space.
          reproject_ids = (ids - projection_length * example_ids_filtered)

          weights = array_ops.reshape(
              math_ops.unsorted_segment_sum(weight_tensor, idx,
                                            array_ops.shape(ids)[0]), [-1])
          sparse_feature_with_values.append(
              SparseFeatureColumn(example_ids_filtered, reproject_ids, weights))
          # If a partitioner was used during variable creation, we will have a
          # list of Variables here larger than 1.
          vars_to_append = columns_to_variables[column][0]
          if len(columns_to_variables[column]) > 1:
            vars_to_append = columns_to_variables[column]
          sparse_feature_with_values_weights.append(vars_to_append)
        else:
          raise ValueError('SDCAOptimizer does not support column type %s.' %
                           type(column).__name__)

      example_weights = array_ops.reshape(
          features[weight_column_name],
          shape=[-1]) if weight_column_name else array_ops.ones([batch_size])
      example_ids = features[self._example_id_column]
      sparse_feature_with_values.extend(sparse_features)
      sparse_feature_with_values_weights.extend(sparse_feature_weights)
      examples = dict(
          sparse_features=sparse_feature_with_values,
          dense_features=dense_features,
          example_labels=math_ops.to_float(
              array_ops.reshape(targets, shape=[-1])),
          example_weights=example_weights,
          example_ids=example_ids)
      sdca_variables = dict(
          sparse_features_weights=sparse_feature_with_values_weights,
          dense_features_weights=dense_feature_weights)
      return examples, sdca_variables
Exemplo n.º 38
0
def norm(tensor,
         ord='euclidean',
         axis=None,
         keepdims=None,
         name=None,
         keep_dims=None):
    r"""Computes the norm of vectors, matrices, and tensors.

  This function can compute several different vector norms (the 1-norm, the
  Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and
  matrix norms (Frobenius, 1-norm, 2-norm and inf-norm).

  Args:
    tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128`
    ord: Order of the norm. Supported values are 'fro', 'euclidean',
      `1`, `2`, `np.inf` and any positive real number yielding the corresponding
      p-norm. Default is 'euclidean' which is equivalent to Frobenius norm if
      `tensor` is a matrix and equivalent to 2-norm for vectors.
      Some restrictions apply:
        a) The Frobenius norm `fro` is not defined for vectors,
        b) If axis is a 2-tuple (matrix norm), only 'euclidean', 'fro', `1`,
           `2`, `np.inf` are supported.
      See the description of `axis` on how to compute norms for a batch of
      vectors or matrices stored in a tensor.
    axis: If `axis` is `None` (the default), the input is considered a vector
      and a single vector norm is computed over the entire set of values in the
      tensor, i.e. `norm(tensor, ord=ord)` is equivalent to
      `norm(reshape(tensor, [-1]), ord=ord)`.
      If `axis` is a Python integer, the input is considered a batch of vectors,
      and `axis` determines the axis in `tensor` over which to compute vector
      norms.
      If `axis` is a 2-tuple of Python integers it is considered a batch of
      matrices and `axis` determines the axes in `tensor` over which to compute
      a matrix norm.
      Negative indices are supported. Example: If you are passing a tensor that
      can be either a matrix or a batch of matrices at runtime, pass
      `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are
      computed.
    keepdims: If True, the axis indicated in `axis` are kept with size 1.
      Otherwise, the dimensions in `axis` are removed from the output shape.
    name: The name of the op.
    keep_dims: Deprecated alias for `keepdims`.

  Returns:
    output: A `Tensor` of the same type as tensor, containing the vector or
      matrix norms. If `keepdims` is True then the rank of output is equal to
      the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar,
      if `axis` is an integer, the rank of `output` is one less than the rank
      of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less
      than the rank of `tensor`.

  Raises:
    ValueError: If `ord` or `axis` is invalid.

  @compatibility(numpy)
  Mostly equivalent to numpy.linalg.norm.
  Not supported: ord <= 0, 2-norm for matrices, nuclear norm.
  Other differences:
    a) If axis is `None`, treats the flattened `tensor` as a vector
     regardless of rank.
    b) Explicitly supports 'euclidean' norm as the default, including for
     higher order tensors.
  @end_compatibility
  """
    keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims,
                                                      'keep_dims', keep_dims)
    if keepdims is None:
        keepdims = False

    is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list))
                      and len(axis) == 2)
    if is_matrix_norm:
        axis = tuple(axis)
        if (not isinstance(axis[0], int) or not isinstance(axis[1], int)
                or axis[0] == axis[1]):
            raise ValueError(
                "'axis' must be None, an integer, or a tuple of 2 unique integers"
            )
        supported_matrix_norms = ['euclidean', 'fro', 1, 2, np.inf]
        if ord not in supported_matrix_norms:
            raise ValueError(
                "'ord' must be a supported matrix norm in %s, got %s" %
                (supported_matrix_norms, ord))
    else:
        if not (isinstance(axis, int) or axis is None):
            raise ValueError(
                "'axis' must be None, an integer, or a tuple of 2 unique integers"
            )

        supported_vector_norms = ['euclidean', 1, 2, np.inf]
        if (not np.isreal(ord)
                or ord <= 0) and ord not in supported_vector_norms:
            raise ValueError("'ord' must be a supported vector norm, got %s" %
                             ord)
        if axis is not None:
            axis = (axis, )

    with ops.name_scope(name, 'norm', [tensor]):
        tensor = ops.convert_to_tensor(tensor)

        if ord in ['fro', 'euclidean', 2, 2.0]:
            if is_matrix_norm and ord in [2, 2.0]:
                rank = array_ops.rank(tensor)
                positive_axis = functional_ops.map_fn(
                    lambda i: control_flow_ops.cond(i >= 0, lambda: i, lambda:
                                                    i + rank),
                    ops.convert_to_tensor(axis))
                axes = math_ops.range(rank)
                perm_before = array_ops.concat([
                    array_ops.setdiff1d(axes, positive_axis)[0], positive_axis
                ],
                                               axis=0)
                perm_after = functional_ops.map_fn(
                    lambda i: math_ops.cast(array_ops.squeeze(
                        array_ops.where(math_ops.equal(perm_before, i))),
                                            dtype=dtypes.int32), axes)
                permed = array_ops.transpose(tensor, perm=perm_before)
                matrix_2_norm = array_ops.expand_dims(math_ops.reduce_max(
                    math_ops.abs(
                        gen_linalg_ops.svd(permed, compute_uv=False)[0]),
                    axis=-1,
                    keepdims=True),
                                                      axis=-1)
                result = array_ops.transpose(matrix_2_norm, perm=perm_after)
            else:
                result = math_ops.sqrt(
                    math_ops.reduce_sum(tensor * math_ops.conj(tensor),
                                        axis,
                                        keepdims=True))
        else:
            result = math_ops.abs(tensor)
            if ord == 1:
                sum_axis = None if axis is None else axis[0]
                result = math_ops.reduce_sum(result, sum_axis, keepdims=True)
                if is_matrix_norm:
                    result = math_ops.reduce_max(result,
                                                 axis[-1],
                                                 keepdims=True)
            elif ord == np.inf:
                if is_matrix_norm:
                    result = math_ops.reduce_sum(result,
                                                 axis[1],
                                                 keepdims=True)
                max_axis = None if axis is None else axis[0]
                result = math_ops.reduce_max(result, max_axis, keepdims=True)
            else:
                # General p-norms (positive p only)
                result = math_ops.pow(
                    math_ops.reduce_sum(math_ops.pow(result, ord),
                                        axis,
                                        keepdims=True), 1.0 / ord)
        if not keepdims:
            result = array_ops.squeeze(result, axis)
        return result
Exemplo n.º 39
0
def _multidimensional_dynamic_rnn_loop(cell,
                                       inputs,
                                       initial_state,
                                       height,
                                       width,
                                       parallel_iterations,
                                       swap_memory,
                                       sequence_length=None,
                                       dtype=None):
    """Internal implementation of Dynamic RNN.
  Args:
    cell: An instance of RNNCell.
    inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested
      tuple of such elements.
    initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if
      `cell.state_size` is a tuple, then this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell.state_size`.
    parallel_iterations: Positive Python int.
    swap_memory: A Python boolean
    sequence_length: (optional) An `int32` `Tensor` of shape [batch_size].
    dtype: (optional) Expected dtype of output. If not specified, inferred from
      initial_state.
  Returns:
    Tuple `(final_outputs, final_state)`.
    final_outputs:
      A `Tensor` of shape `[time, batch_size, cell.output_size]`.  If
      `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape`
      objects, then this returns a (possibly nsted) tuple of Tensors matching
      the corresponding shapes.
    final_state:
      A `Tensor`, or possibly nested tuple of Tensors, matching in length
      and shapes to `initial_state`.
  Raises:
    ValueError: If the input depth cannot be inferred via shape inference
      from the inputs.
  """
    state = initial_state
    assert isinstance(parallel_iterations,
                      int), "parallel_iterations must be int"

    state_size = cell.state_size

    flat_input = nest.flatten(inputs)
    flat_output_size = nest.flatten(cell.output_size)

    # Construct an initial output
    input_shape = array_ops.shape(flat_input[0])
    time_steps = input_shape[0]
    batch_size = _best_effort_input_batch_size(flat_input)

    inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3)
                             for input_ in flat_input)

    const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2]

    for shape in inputs_got_shape:
        if not shape[2:].is_fully_defined():
            raise ValueError(
                "Input size (depth of inputs) must be accessible via shape inference,"
                " but saw value None.")
        got_time_steps = shape[0].value
        got_batch_size = shape[1].value
        if const_time_steps != got_time_steps:
            raise ValueError(
                "Time steps is not the same for all the elements in the input in a "
                "batch.")
        if const_batch_size != got_batch_size:
            raise ValueError(
                "Batch_size is not the same for all the elements in the input."
            )

    # Prepare dynamic conditional copying of state & output
    def _create_zero_arrays(size):
        size = _concat(batch_size, size)
        return array_ops.zeros(array_ops.stack(size),
                               _infer_state_dtype(dtype, state))

    flat_zero_output = tuple(
        _create_zero_arrays(output) for output in flat_output_size)
    zero_output = nest.pack_sequence_as(structure=cell.output_size,
                                        flat_sequence=flat_zero_output)

    if sequence_length is not None:
        min_sequence_length = math_ops.reduce_min(sequence_length)
        max_sequence_length = math_ops.reduce_max(sequence_length)

    time = array_ops.constant(0, dtype=dtypes.int32, name="time")

    with ops.name_scope("dynamic_rnn") as scope:
        base_name = scope

    def _create_ta(name, dtype):
        return tensor_array_ops.TensorArray(dtype=dtype,
                                            size=time_steps,
                                            tensor_array_name=base_name + name)

    output_ta = tuple(
        _create_ta("output_%d" % i, _infer_state_dtype(dtype, state))
        for i in range(len(flat_output_size)))
    input_ta = tuple(
        _create_ta("input_%d" % i, flat_input[i].dtype)
        for i in range(len(flat_input)))

    input_ta = tuple(
        ta.unstack(input_) for ta, input_ in zip(input_ta, flat_input))

    def _time_step(time, output_ta_t, state):
        """Take a time step of the dynamic RNN.
    Args:
      time: int32 scalar Tensor.
      output_ta_t: List of `TensorArray`s that represent the output.
      state: nested tuple of vector tensors that represent the state.
    Returns:
      The tuple (time + 1, output_ta_t with updated flow, new_state).
    """

        input_t = tuple(ta.read(time) for ta in input_ta)
        # Restore some shape information
        for input_, shape in zip(input_t, inputs_got_shape):
            input_.set_shape(shape[1:])

        input_t = nest.pack_sequence_as(structure=inputs,
                                        flat_sequence=input_t)
        call_cell = lambda: cell.call(input_t, state, height, width, time)

        if sequence_length is not None:
            (output,
             new_state) = _rnn_step(time=time,
                                    sequence_length=sequence_length,
                                    min_sequence_length=min_sequence_length,
                                    max_sequence_length=max_sequence_length,
                                    zero_output=zero_output,
                                    state=state,
                                    call_cell=call_cell,
                                    state_size=state_size,
                                    skip_conditionals=True)
        else:
            (output, new_state) = call_cell()

        # Pack state if using state tuples
        output = nest.flatten(output)

        output_ta_t = tuple(
            ta.write(time, out) for ta, out in zip(output_ta_t, output))

        return (time + 1, output_ta_t, new_state)

    _, output_final_ta, final_state = control_flow_ops.while_loop(
        cond=lambda time, *_: time < time_steps,
        body=_time_step,
        loop_vars=(time, output_ta, state),
        parallel_iterations=parallel_iterations,
        swap_memory=swap_memory)

    # Unpack final output if not using output tuples.
    final_outputs = tuple(ta.stack() for ta in output_final_ta)

    # Restore some shape information
    for output, output_size in zip(final_outputs, flat_output_size):
        shape = _concat([const_time_steps, const_batch_size],
                        output_size,
                        static=True)
        output.set_shape(shape)

    final_outputs = nest.pack_sequence_as(structure=cell.output_size,
                                          flat_sequence=final_outputs)

    return (final_outputs, final_state)
Exemplo n.º 40
0
def confusion_matrix(labels,
                     predictions,
                     num_classes=None,
                     dtype=dtypes.int32,
                     name=None,
                     weights=None):
    """Computes the confusion matrix from predictions and labels.

  Calculate the Confusion Matrix for a pair of prediction and
  label 1-D int arrays.

  The matrix columns represent the prediction labels and the rows represent the
  real labels. The confusion matrix is always a 2-D array of shape `[n, n]`,
  where `n` is the number of valid labels for a given classification task. Both
  prediction and labels must be 1-D arrays of the same shape in order for this
  function to work.

  If `num_classes` is None, then `num_classes` will be set to the one plus
  the maximum value in either predictions or labels.
  Class labels are expected to start at 0. E.g., if `num_classes` was
  three, then the possible labels would be `[0, 1, 2]`.

  If `weights` is not `None`, then each prediction contributes its
  corresponding weight to the total value of the confusion matrix cell.

  For example:

  ```python
    tf.contrib.metrics.confusion_matrix([1, 2, 4], [2, 2, 4]) ==>
        [[0 0 0 0 0]
         [0 0 1 0 0]
         [0 0 1 0 0]
         [0 0 0 0 0]
         [0 0 0 0 1]]
  ```

  Note that the possible labels are assumed to be `[0, 1, 2, 3, 4]`,
  resulting in a 5x5 confusion matrix.

  Args:
    labels: 1-D `Tensor` of real labels for the classification task.
    predictions: 1-D `Tensor` of predictions for a given classification.
    num_classes: The possible number of labels the classification task can
                 have. If this value is not provided, it will be calculated
                 using both predictions and labels array.
    dtype: Data type of the confusion matrix.
    name: Scope name.
    weights: An optional `Tensor` whose shape matches `predictions`.

  Returns:
    A k X k matrix representing the confusion matrix, where k is the number of
    possible labels in the classification task.

  Raises:
    ValueError: If both predictions and labels are not 1-D vectors and have
      mismatched shapes, or if `weights` is not `None` and its shape doesn't
      match `predictions`.
  """
    with ops.name_scope(name, 'confusion_matrix',
                        (predictions, labels, num_classes, weights)) as name:
        labels, predictions = remove_squeezable_dimensions(
            ops.convert_to_tensor(labels, name='labels'),
            ops.convert_to_tensor(predictions, name='predictions'))
        predictions = math_ops.cast(predictions, dtypes.int64)
        labels = math_ops.cast(labels, dtypes.int64)

        # Sanity checks - underflow or overflow can cause memory corruption.
        labels = control_flow_ops.with_dependencies([
            check_ops.assert_non_negative(
                labels, message='`labels` contains negative values')
        ], labels)
        predictions = control_flow_ops.with_dependencies([
            check_ops.assert_non_negative(
                predictions, message='`predictions` contains negative values')
        ], predictions)

        if num_classes is None:
            num_classes = math_ops.maximum(math_ops.reduce_max(predictions),
                                           math_ops.reduce_max(labels)) + 1
        else:
            num_classes_int64 = math_ops.cast(num_classes, dtypes.int64)
            labels = control_flow_ops.with_dependencies([
                check_ops.assert_less(
                    labels, num_classes_int64, message='`labels` out of bound')
            ], labels)
            predictions = control_flow_ops.with_dependencies([
                check_ops.assert_less(predictions,
                                      num_classes_int64,
                                      message='`predictions` out of bound')
            ], predictions)

        if weights is not None:
            predictions.get_shape().assert_is_compatible_with(
                weights.get_shape())
            weights = math_ops.cast(weights, dtype)

        shape = array_ops.stack([num_classes, num_classes])
        indices = array_ops.transpose(array_ops.stack([labels, predictions]))
        values = (array_ops.ones_like(predictions, dtype)
                  if weights is None else weights)
        cm_sparse = sparse_tensor.SparseTensor(
            indices=indices,
            values=values,
            dense_shape=math_ops.to_int64(shape))
        zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype)

        return sparse_ops.sparse_add(zero_matrix, cm_sparse)
Exemplo n.º 41
0
    def _call_cell(self, inputs, initial_cell_state, initial_output, dtype,
                   sequence_length):
        """Run this LSTM on inputs, starting from the given state.

    Args:
      inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`
      initial_cell_state: initial value for cell state, shape `[batch_size,
        self._num_units]`
      initial_output: initial value of cell output, shape `[batch_size,
        self._num_units]`
      dtype: The data type for the initial state and expected output.
      sequence_length: Specifies the length of each sequence in inputs. An
        `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0,
        time_len)` or None.

    Returns:
      A pair containing:

      - Cell state (cs): A `3-D` tensor of shape `[time_len, batch_size,
                         output_size]`
      - Output (h): A `3-D` tensor of shape `[time_len, batch_size,
                    output_size]`
    """

        inputs_shape = inputs.get_shape().with_rank(3)
        time_len = inputs_shape[0].value
        if time_len is None:
            time_len = array_ops.shape(inputs)[0]
        input_size = inputs_shape[2].value
        w = vs.get_variable(
            "weights", [input_size + self._num_units, self._num_units * 4],
            dtype=dtype)
        b = vs.get_variable("biases", [w.get_shape().with_rank(2)[1]],
                            initializer=init_ops.constant_initializer(0.0),
                            dtype=dtype)
        if self._use_peephole:
            wci = vs.get_variable("w_i_diag", [self._num_units], dtype=dtype)
            wco = vs.get_variable("w_o_diag", [self._num_units], dtype=dtype)
            wcf = vs.get_variable("w_f_diag", [self._num_units], dtype=dtype)
        else:
            wci = wco = wcf = array_ops.zeros([self._num_units], dtype=dtype)

        if sequence_length is None:
            max_seq_len = time_len
        else:
            max_seq_len = math_ops.to_int64(
                math_ops.reduce_max(sequence_length))

        _, cs, _, _, _, _, h = _lstm_ops_so.block_lstm(
            seq_len_max=max_seq_len,
            x=inputs,
            cs_prev=initial_cell_state,
            h_prev=initial_output,
            w=w,
            wci=wci,
            wco=wco,
            wcf=wcf,
            b=b,
            forget_bias=self._forget_bias,
            cell_clip=self._cell_clip,
            use_peephole=self._use_peephole)
        return cs, h
Exemplo n.º 42
0
def LastValueQuantize(inputs,
                      per_channel=False,
                      init_min=-6.0,
                      init_max=6.0,
                      vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES,
                      name_prefix='LastValueQuant',
                      reuse=None,
                      is_training=True,
                      num_bits=8,
                      narrow_range=False):
    """Adds a layer that collects quantization ranges as last input ranges.

  LastValueQuantize creates variables called 'min' and 'max', representing the
  interval used for quantization and clamping.

  Args:
    inputs: a tensor containing values to be quantized.
    per_channel: (Optional) a boolean specifying whether to use different
      quantization ranges per output channel.
    init_min: a float scalar, the initial value for variable min.
    init_max: a float scalar, the initial value for variable max.
    vars_collection: (Optional) collection where to store variables for
      quantization interval ends.
    name_prefix: name_prefix for created nodes.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    is_training: Whether the op is applied to a training or eval graph.
    num_bits: Number of bits to use for quantization, must be between 2 and 8.
    narrow_range: Whether to use the narrow quantization range
      [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
  Returns:
    a tensor containing quantized values.
  """
    with variable_scope.variable_scope(None,
                                       default_name=name_prefix,
                                       values=[inputs],
                                       reuse=reuse) as scope:
        scope.set_partitioner(None)
        input_shape = inputs.get_shape()
        input_dim = len(input_shape)
        if per_channel:
            # Only support quantizing 1-, 2- and 4-dimensional tensors.
            assert input_dim in [1, 2, 4
                                 ], ('Expected 1D, 2D or 4D input, was: %s in '
                                     ' scope: %s' % (input_shape, name_prefix))
            min_max_shape = [input_shape[-1]]
        else:
            min_max_shape = []

        min_var = model_variable(
            'min',
            shape=min_max_shape,
            initializer=init_ops.constant_initializer(init_min),
            collections=[vars_collection],
            trainable=False)
        max_var = model_variable(
            'max',
            shape=min_max_shape,
            initializer=init_ops.constant_initializer(init_max),
            collections=[vars_collection],
            trainable=False)
        if not is_training:
            return _FakeQuantWithMinMaxVars(inputs,
                                            min_var,
                                            max_var,
                                            per_channel=per_channel,
                                            num_bits=num_bits,
                                            narrow_range=narrow_range)

        if per_channel:
            if input_dim == 2:
                reduce_dims = [0]
            elif input_dim == 4:
                reduce_dims = [0, 1, 2]

        if per_channel:
            if input_dim >= 2:
                batch_min = math_ops.reduce_min(inputs,
                                                reduction_indices=reduce_dims,
                                                name='BatchMin')
            else:
                batch_min = inputs
        else:
            batch_min = math_ops.reduce_min(inputs, name='BatchMin')
        # TFLite requires that 0.0 if always in the [min; max] range.
        batch_min = math_ops.minimum(batch_min, 0.0)
        assign_min = state_ops.assign(min_var, batch_min, name='AssignMinLast')

        if per_channel:
            if input_dim >= 2:
                batch_max = math_ops.reduce_max(inputs,
                                                reduction_indices=reduce_dims,
                                                name='BatchMax')
            else:
                batch_max = inputs
        else:
            batch_max = math_ops.reduce_max(inputs, name='BatchMax')
        # TFLite requires that 0.0 if always in the [min; max] range.
        batch_max = math_ops.maximum(batch_max, 0.0)
        assign_max = state_ops.assign(max_var, batch_max, name='AssignMaxLast')

        return _FakeQuantWithMinMaxVars(inputs,
                                        assign_min,
                                        assign_max,
                                        per_channel=per_channel,
                                        num_bits=num_bits,
                                        narrow_range=narrow_range)
Exemplo n.º 43
0
def _InsertCalibOp(context,
                   name,
                   producer,
                   consumers,
                   vars_collection=ops.GraphKeys.GLOBAL_VARIABLES,
                   producer_scope=None,
                   consumer_scope=None):
    """Inserts calibration ops between a producer op and (multiple) consumer ops.
  Args:
    context: Context where producer and consumer operations are nested.
    name: Name for the new calibration op within the context.
    producer: Producer operation of the pairs where calibration will be
      inserted.
    consumers: Consumer operations of the pairs.
    producer_scope: The restriction of producer scope. If not None, the new op
      will be inserted only when the producer is in this scope.
    consumer_scope: The restriction of consumer scope. If not None, the new op
      will be inserted only when all the consumers are in this scope.
  Raises:
    ValueError: When producer operation is not directly connected to the
      consumer operation.
  """
    if producer_scope and not producer.name.startswith(producer_scope):
        logging.info(
            '_InsertCalibOp ignores context="%s" name="%s" '
            'because producer "%s" is not in scope "%s"', context, name,
            producer.name, producer_scope)
        return

    if consumer_scope:
        consumers_in_scope = []
        for consumer in consumers:
            if consumer.name.startswith(consumer_scope):
                consumers_in_scope.append(consumer)
            else:
                logging.info(
                    '_InsertCalibOp context="%s" name="%s" ignores '
                    'consumer "%s" because it is not in scope "%s"', context,
                    name, consumer.name, consumer_scope)
                return
        consumers = consumers_in_scope

    name_prefix = _AddContextToName(context, name)

    name_scope = ops.get_name_scope()
    if name_scope:
        name_prefix = common.DropStringPrefix(name_prefix, name_scope + '/')

    inputs = producer.outputs[0]
    # Prevent ops from being modified multiple times. Bypass ops can sometimes
    # overlap between multiple matches, so we need to ensure that we don't
    # add duplicate calibration operations.
    #if _FollowedByFakeQuant(inputs):
    #  return

    with variable_scope.variable_scope(None,
                                       default_name=name_prefix,
                                       values=[inputs]) as scope:
        # Currently no per channel.
        min_max_shape = []
        vars_collections = [vars_collection] if vars_collection else []
        min_var = _ModelVariable('min',
                                 shape=min_max_shape,
                                 initializer=init_ops.constant_initializer(
                                     float('inf')),
                                 collections=vars_collections,
                                 trainable=False)
        max_var = _ModelVariable(
            'max',
            shape=min_max_shape,
            initializer=init_ops.constant_initializer(-float('inf')),
            collections=vars_collections,
            trainable=False)
        batch_min = math_ops.reduce_min(inputs, name='BatchMin')
        batch_max = math_ops.reduce_max(inputs, name='BatchMax')

        range_min = math_ops.minimum(batch_min,
                                     min_var,
                                     name=name_prefix + '/range_min')
        range_max = math_ops.maximum(batch_max,
                                     max_var,
                                     name=name_prefix + '/range_max')

    return range_min, range_max
def pinv(a, rcond=None, validate_args=False, name=None):
    """Compute the Moore-Penrose pseudo-inverse of one or more matrices.

  Calculate the [generalized inverse of a matrix](
  https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_inverse) using its
  singular-value decomposition (SVD) and including all large singular values.

  The pseudo-inverse of a matrix `A`, is defined as: 'the matrix that 'solves'
  [the least-squares problem] `A @ x = b`,' i.e., if `x_hat` is a solution, then
  `A_pinv` is the matrix such that `x_hat = A_pinv @ b`. It can be shown that if
  `U @ Sigma @ V.T = A` is the singular value decomposition of `A`, then
  `A_pinv = V @ inv(Sigma) U^T`. [(Strang, 1980)][1]

  This function is analogous to [`numpy.linalg.pinv`](
  https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.pinv.html).
  It differs only in default value of `rcond`. In `numpy.linalg.pinv`, the
  default `rcond` is `1e-15`. Here the default is
  `10. * max(num_rows, num_cols) * np.finfo(dtype).eps`.

  Args:
    a: (Batch of) `float`-like matrix-shaped `Tensor`(s) which are to be
      pseudo-inverted.
    rcond: `Tensor` of small singular value cutoffs.  Singular values smaller
      (in modulus) than `rcond` * largest_singular_value (again, in modulus) are
      set to zero. Must broadcast against `tf.shape(a)[:-2]`.
      Default value: `10. * max(num_rows, num_cols) * np.finfo(a.dtype).eps`.
    validate_args: When `True`, additional assertions might be embedded in the
      graph.
      Default value: `False` (i.e., no graph assertions are added).
    name: Python `str` prefixed to ops created by this function.
      Default value: 'pinv'.

  Returns:
    a_pinv: (Batch of) pseudo-inverse of input `a`. Has same shape as `a` except
      rightmost two dimensions are transposed.

  Raises:
    TypeError: if input `a` does not have `float`-like `dtype`.
    ValueError: if input `a` has fewer than 2 dimensions.

  #### Examples

  ```python
  import tensorflow as tf
  import tensorflow_probability as tfp

  a = tf.constant([[1.,  0.4,  0.5],
                   [0.4, 0.2,  0.25],
                   [0.5, 0.25, 0.35]])
  tf.matmul(tf.linalg..pinv(a), a)
  # ==> array([[1., 0., 0.],
               [0., 1., 0.],
               [0., 0., 1.]], dtype=float32)

  a = tf.constant([[1.,  0.4,  0.5,  1.],
                   [0.4, 0.2,  0.25, 2.],
                   [0.5, 0.25, 0.35, 3.]])
  tf.matmul(tf.linalg..pinv(a), a)
  # ==> array([[ 0.76,  0.37,  0.21, -0.02],
               [ 0.37,  0.43, -0.33,  0.02],
               [ 0.21, -0.33,  0.81,  0.01],
               [-0.02,  0.02,  0.01,  1.  ]], dtype=float32)
  ```

  #### References

  [1]: G. Strang. 'Linear Algebra and Its Applications, 2nd Ed.' Academic Press,
       Inc., 1980, pp. 139-142.
  """
    with ops.name_scope(name or 'pinv'):
        a = ops.convert_to_tensor(a, name='a')

        assertions = _maybe_validate_matrix(a, validate_args)
        if assertions:
            with ops.control_dependencies(assertions):
                a = array_ops.identity(a)

        dtype = a.dtype.as_numpy_dtype

        if rcond is None:

            def get_dim_size(dim):
                dim_val = tensor_shape.dimension_value(a.shape[dim])
                if dim_val is not None:
                    return dim_val
                return array_ops.shape(a)[dim]

            num_rows = get_dim_size(-2)
            num_cols = get_dim_size(-1)
            if isinstance(num_rows, int) and isinstance(num_cols, int):
                max_rows_cols = float(max(num_rows, num_cols))
            else:
                max_rows_cols = math_ops.cast(
                    math_ops.maximum(num_rows, num_cols), dtype)
            rcond = 10. * max_rows_cols * np.finfo(dtype).eps

        rcond = ops.convert_to_tensor(rcond, dtype=dtype, name='rcond')

        # Calculate pseudo inverse via SVD.
        # Note: if a is Hermitian then u == v. (We might observe additional
        # performance by explicitly setting `v = u` in such cases.)
        [
            singular_values,  # Sigma
            left_singular_vectors,  # U
            right_singular_vectors,  # V
        ] = svd(a, full_matrices=False, compute_uv=True)

        # Saturate small singular values to inf. This has the effect of make
        # `1. / s = 0.` while not resulting in `NaN` gradients.
        cutoff = rcond * math_ops.reduce_max(singular_values, axis=-1)
        singular_values = array_ops.where_v2(
            singular_values > array_ops.expand_dims_v2(cutoff, -1),
            singular_values, np.array(np.inf, dtype))

        # By the definition of the SVD, `a == u @ s @ v^H`, and the pseudo-inverse
        # is defined as `pinv(a) == v @ inv(s) @ u^H`.
        a_pinv = math_ops.matmul(right_singular_vectors /
                                 array_ops.expand_dims_v2(singular_values, -2),
                                 left_singular_vectors,
                                 adjoint_b=True)

        if a.shape is not None and a.shape.rank is not None:
            a_pinv.set_shape(a.shape[:-2].concatenate(
                [a.shape[-1], a.shape[-2]]))

        return a_pinv
Exemplo n.º 45
0
    def _call_cell(self,
                   inputs,
                   initial_cell_state=None,
                   initial_output=None,
                   dtype=None,
                   sequence_length=None):
        """Run this LSTM on inputs, starting from the given state.

    Args:
      inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`
      initial_cell_state: initial value for cell state, shape `[batch_size,
        self._num_units]`
      initial_output: initial value of cell output, shape `[batch_size,
        self._num_units]`
      dtype: The data type for the initial state and expected output.
      sequence_length: Specifies the length of each sequence in inputs. An
        `int32` or `int64` vector (tensor) size `[batch_size]`, values in `[0,
        time_len)` or None.

    Returns:
      A pair containing:

      - Cell state (cs): A `3-D` tensor of shape `[time_len, batch_size,
                         output_size]`
      - Output (h): A `3-D` tensor of shape `[time_len, batch_size,
                    output_size]`
    """

        inputs_shape = inputs.get_shape().with_rank(3)
        time_len = inputs_shape.dims[0].value
        if time_len is None:
            time_len = array_ops.shape(inputs)[0]

        if self._use_peephole:
            wci = self._w_i_diag
            wco = self._w_o_diag
            wcf = self._w_f_diag
        else:
            wci = wcf = wco = array_ops.zeros([self._num_units], dtype=dtype)

        if sequence_length is None:
            max_seq_len = math_ops.to_int64(time_len)
        else:
            max_seq_len = math_ops.to_int64(
                math_ops.reduce_max(sequence_length))

        _, cs, _, _, _, _, h = _lstm_ops_so.block_lstm_fused_our(
            seq_len_max=max_seq_len,
            x=inputs,
            cs_prev=initial_cell_state,
            h_prev=initial_output,
            w=self._kernel,
            wci=wci,
            wcf=wcf,
            wco=wco,
            b=self._bias,
            group_size_attr=self._group_size,
            forget_bias=self._forget_bias,
            cell_clip=self._cell_clip,
            use_peephole=self._use_peephole)
        return cs, h
Exemplo n.º 46
0
def _embedding_lookup_with_distributed_aggregation(params,
                                                   ids,
                                                   partition_strategy="mod",
                                                   name=None,
                                                   max_norm=None,
                                                   weights=None,
                                                   idx=None,
                                                   segment_ids=None):
    """Lookup helper for embedding_lookup_sparse_with_distributed_aggregation."""
    if params is None or params == []:  # pylint: disable=g-explicit-bool-comparison
        raise ValueError("Need at least one param")
    if isinstance(params, variables.PartitionedVariable):
        params = list(params)  # Iterate to get the underlying Variables.
    if not isinstance(params, list):
        params = [params]

    def maybe_normalize(x):
        if max_norm is not None:
            if x.get_shape().ndims is not None:
                ndims = x.get_shape().ndims
            else:
                ndims = array_ops.size(array_ops.shape(x))
            return clip_ops.clip_by_norm(x,
                                         max_norm,
                                         axes=list(range(1, ndims)))
        return x

    with ops.name_scope(name, "embedding_lookup_with_distributed_aggregation",
                        params + [ids]) as name:
        np = len(params)  # Number of partitions
        # Preserve the resource variable status to avoid accidental dense reads.
        if not any(
                isinstance(p, resource_variable_ops.ResourceVariable)
                for p in params):
            params = ops.convert_n_to_tensor_or_indexed_slices(params,
                                                               name="params")
        if np == 1:
            with ops.colocate_with(params[0]):
                ret = maybe_normalize(_do_gather(params[0], ids))
                ignore_weights = weights is None
                if not ignore_weights:
                    if weights.dtype != ret.dtype:
                        weights = math_ops.cast(weights, ret.dtype)
                    # Reshape to allow broadcast
                    ones = array_ops.fill(
                        array_ops.expand_dims(array_ops.rank(ret) - 1, 0), 1)
                    bcast_weights_shape = array_ops.concat(
                        [array_ops.shape(weights), ones], 0)
                    orig_weights_shape = weights.get_shape()
                    weights = array_ops.reshape(weights, bcast_weights_shape)
                    # Set weights shape after reshape
                    if ret.get_shape().ndims is not None:
                        weights.set_shape(
                            orig_weights_shape.concatenate(
                                [1 for _ in range(ret.get_shape().ndims - 1)]))
                    ret *= weights
                    return math_ops.segment_sum(ret, segment_ids, name=name)
                else:
                    return math_ops.sparse_segment_sum(ret,
                                                       idx,
                                                       segment_ids,
                                                       name=name)
        else:
            ids = ops.convert_to_tensor(ids, name="ids")
            flat_ids = array_ops.reshape(ids, [-1])
            original_indices = math_ops.range(array_ops.size(flat_ids))

            # Create p_assignments and set new_ids depending on the strategy.
            if partition_strategy == "mod":
                p_assignments = flat_ids % np
                new_ids = flat_ids // np
            elif partition_strategy == "div":
                # Compute num_total_ids as the sum of dim-0 of params, then assign to
                # partitions based on a constant number of ids per partition. Optimize
                # if we already know the full shape statically.
                dim_0_size = params[0].get_shape()[0]
                for p in xrange(1, np):
                    dim_0_size += params[p].get_shape()[0]
                if dim_0_size.value:
                    num_total_ids = constant_op.constant(
                        dim_0_size.value, flat_ids.dtype)
                else:
                    dim_0_sizes = []
                    for p in xrange(np):
                        if params[p].get_shape()[0].value is not None:
                            dim_0_sizes.append(params[p].get_shape()[0].value)
                        else:
                            with ops.colocate_with(params[p]):
                                dim_0_sizes.append(
                                    array_ops.shape(params[p])[0])
                    num_total_ids = math_ops.reduce_sum(
                        math_ops.cast(array_ops.stack(dim_0_sizes),
                                      flat_ids.dtype))
                ids_per_partition = num_total_ids // np
                extras = num_total_ids % np

                p_assignments = math_ops.maximum(
                    flat_ids // (ids_per_partition + 1),
                    (flat_ids - extras) // ids_per_partition)

                # Emulate a conditional using a boolean indicator tensor
                is_in_first_extras_partitions = math_ops.cast(
                    p_assignments < extras, flat_ids.dtype)
                new_ids = (is_in_first_extras_partitions *
                           (flat_ids % (ids_per_partition + 1)) +
                           (1 - is_in_first_extras_partitions) *
                           ((flat_ids - extras) % ids_per_partition))
            else:
                raise ValueError("Unrecognized partition strategy: " +
                                 partition_strategy)

            # Cast partition assignments to int32 for use in dynamic_partition.
            # There really should not be more than 2^32 partitions.
            p_assignments = math_ops.cast(p_assignments, dtypes.int32)
            # Partition list of ids based on assignments into np separate lists
            gather_ids = data_flow_ops.dynamic_partition(
                new_ids, p_assignments, np)
            # Similarly, partition the original indices.
            pindices = data_flow_ops.dynamic_partition(original_indices,
                                                       p_assignments, np)
            # Do np separate lookups, finding embeddings for plist[p] in params[p]
            partitioned_result = []
            for p in xrange(np):
                with ops.colocate_with(params[p]):
                    partitioned_result.append(
                        _do_gather(params[p], gather_ids[p]))

            ignore_weights = weights is None
            if not ignore_weights:
                # Partition weights according to pindices.
                partitioned_weight = []
                for p in xrange(np):
                    partitioned_weight.append(
                        array_ops.gather(weights, pindices[p]))
            # Reshape each partition result.
            element_shape = params[0].get_shape()[1:]
            for p in params[1:]:
                element_shape = element_shape.merge_with(p.get_shape()[1:])
            if element_shape.is_fully_defined():
                for p in xrange(np):
                    with ops.colocate_with(params[p]):
                        partitioned_result[p] = array_ops.reshape(
                            partitioned_result[p],
                            array_ops.concat(
                                [array_ops.shape(pindices[p]), element_shape],
                                0))
            else:
                with ops.colocate_with(params[0]):
                    params_shape = array_ops.shape(params[0])
                for p in xrange(np):
                    with ops.colocate_with(params[p]):
                        partitioned_result[p] = array_ops.reshape(
                            partitioned_result[p],
                            array_ops.concat([
                                array_ops.shape(pindices[p]),
                                array_ops.slice(params_shape, [1], [-1])
                            ], 0))
            # Normalize each partition result.
            for p in xrange(np):
                with ops.colocate_with(params[p]):
                    partitioned_result[p] = maybe_normalize(
                        partitioned_result[p])
            if not ignore_weights:
                # Multiply each partition result with partition weights.
                for p in xrange(np):
                    with ops.colocate_with(params[p]):
                        if partitioned_weight[p].dtype != partitioned_result[
                                p].dtype:
                            partitioned_weight[p] = math_ops.cast(
                                partitioned_weight[p],
                                partitioned_result[p].dtype)
                        # Reshape partition weights.
                        ones = array_ops.fill(
                            array_ops.expand_dims(
                                array_ops.rank(partitioned_result[p]) - 1, 0),
                            1)
                        bcast_weights_shape = array_ops.concat(
                            [array_ops.shape(partitioned_weight[p]), ones], 0)
                        orig_weights_shape = partitioned_weight[p].get_shape()
                        partitioned_weight[p] = array_ops.reshape(
                            partitioned_weight[p], bcast_weights_shape)
                        if partitioned_result[p].get_shape().ndims is not None:
                            partitioned_weight[p].set_shape(
                                orig_weights_shape.concatenate([
                                    1 for _ in range(partitioned_result[p].
                                                     get_shape().ndims - 1)
                                ]))
                        partitioned_result[p] *= partitioned_weight[p]
            partitioned_segment_ids = []
            for p in xrange(np):
                if not ignore_weights:
                    # Partition segment_ids according to pindices.
                    p_segment_ids = array_ops.gather(segment_ids, pindices[p])
                    # Number the p_segment_ids to meet segment_sum's requirements. Note
                    # that unique_p_segment_ids contains unique segment ids of this
                    # partition and these ids' order is unchanged.
                    unique_p_segment_ids, unique_p_segment_idx = array_ops.unique(
                        p_segment_ids)
                    partitioned_segment_ids.append(unique_p_segment_ids)
                    # segment_sum this partition's result.
                    with ops.colocate_with(params[p]):
                        partitioned_result[p] = math_ops.segment_sum(
                            partitioned_result[p], unique_p_segment_idx)
                else:
                    # When ignore weights, we need to get indexs of elements in idx and
                    # segment_ids.
                    _, exclude_idx = array_ops.setdiff1d(idx, pindices[p])
                    all_idx = math_ops.range(array_ops.shape(idx)[0])
                    _, include_idx = array_ops.setdiff1d(all_idx, exclude_idx)
                    # Gather segment_ids and idx according to indexs.
                    p_segment_ids = array_ops.gather(segment_ids, include_idx)
                    p_idx = array_ops.gather(idx, include_idx)
                    # Number the p_segment_ids, same as ignore_weights case above.
                    unique_p_segment_ids, unique_p_segment_idx = array_ops.unique(
                        p_segment_ids)
                    _, unique_p_idx_idx = array_ops.unique(p_idx)
                    partitioned_segment_ids.append(unique_p_segment_ids)
                    with ops.colocate_with(params[p]):
                        partitioned_result[p] = math_ops.sparse_segment_sum(
                            partitioned_result[p], unique_p_idx_idx,
                            unique_p_segment_idx)
            # Concat each partition's segment_ids and result for final segment_sum.
            concat_segment_ids = array_ops.concat(partitioned_segment_ids, 0)
            concat_partitioned_result = array_ops.concat(partitioned_result, 0)
            return math_ops.unsorted_segment_sum(
                concat_partitioned_result,
                concat_segment_ids,
                math_ops.reduce_max(concat_segment_ids) + 1,
                name=name)
Exemplo n.º 47
0
def categorical_hinge(y_true, y_pred):
    y_pred = ops.convert_to_tensor(y_pred)
    y_true = math_ops.cast(y_true, y_pred.dtype)
    pos = math_ops.reduce_sum(y_true * y_pred, axis=-1)
    neg = math_ops.reduce_max((1. - y_true) * y_pred, axis=-1)
    return math_ops.maximum(0., neg - pos + 1.)
Exemplo n.º 48
0
 def testMaxGradient(self):
   inputs = constant_op.constant([1.0], dtype=dtypes.float32)
   outputs = math_ops.reduce_max(array_ops.concat([inputs, inputs], 0))
   with self.cached_session():
     error = gradient_checker.compute_gradient_error(inputs, [1], outputs, [])
     self.assertLess(error, 1e-4)
 def tpu_fn(x, y):
     a = x + 7.0
     b = y * 2.0
     c, d, e = tpu.outside_compilation(outside_fn, a, b)
     return (math_ops.reduce_max(c) + math_ops.reduce_min(d) +
             math_ops.reduce_sum(e))
Exemplo n.º 50
0
def PPD_ADAGRAD(objective,
                stage_idx,
                T0,
                eta,
                W,
                W0,
                a,
                b,
                a0,
                b0,
                alpha,
                gamma,
                factor=1.2):
    update_ops = []

    grad_w = tf.gradients(objective, W)
    grad_a = tf.gradients(objective, a)
    grad_b = tf.gradients(objective, b)
    grad_alpha = tf.gradients(objective, alpha)[0]

    grad_v = grad_w + grad_a + grad_b + [-grad_alpha]
    V = W + [a, b] + [alpha]
    V0 = W0 + [a0, b0] + [0]
    d = np.sum([np.prod(v.get_shape().as_list()) for v in W])

    epsilon = 0.5
    accumulators = [
        tf.Variable(tf.zeros(w.get_shape().as_list()),
                    dtype=tf.float32,
                    name='acc') for w in V
    ]  #[K.zeros(w.get_shape().as_list()) for w in V] #
    grad_accumulators = [
        tf.Variable(tf.zeros(w.get_shape().as_list()),
                    dtype=tf.float32,
                    name='grad_acc') for w in V
    ]
    M_s = T0 * math_ops.sqrt(factor**(stage_idx - 1))
    max_i = tf.Variable(0, dtype=tf.float32, name='max_i')
    sum_gt = tf.Variable(0, dtype=tf.float32, name='sum_gt')

    update_ops_bn = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops_bn):

        count = 0
        for p, g, a, g_a, p0 in zip(V, grad_v, accumulators, grad_accumulators,
                                    V0):

            if count != len(V) - 1:
                new_g = g + (1 / gamma) * (p - p0)
            else:
                new_g = g

            new_g = g + (1 / gamma) * (p - p0)
            new_a = a + math_ops.square(new_g)
            update_ops.append(a.assign(new_a))

            new_g_a = g_a + new_g  # gradient accumlators
            update_ops.append(g_a.assign(new_g_a))

            tmp_max_i = math_ops.maximum(max_i,
                                         K.sqrt(math_ops.reduce_max(new_a)))
            tmp_sum_gt = K.sum(K.sqrt(new_a)) + sum_gt

            update_ops.append(max_i.assign(tmp_max_i))
            update_ops.append(sum_gt.assign(tmp_sum_gt))

            new_p = -eta * (new_g_a) / (K.sqrt(new_a) + epsilon) + p0
            update_ops.append(p.assign(new_p))

            count += 1

    return_value = M_s * math_ops.sqrt((max_i + epsilon) * (sum_gt / (d + 3)))
    return update_ops, return_value, accumulators, grad_accumulators, max_i, sum_gt, M_s
Exemplo n.º 51
0
def vectorized_map(fn, elems, fallback_to_while_loop=True):
  """Parallel map on the list of tensors unpacked from `elems` on dimension 0.

  This method works similar to `tf.map_fn` but is optimized to run much faster,
  possibly with a much larger memory footprint. The speedups are obtained by
  vectorization (see [Auto-Vectorizing TensorFlow Graphs: Jacobians,
  Auto-Batching and Beyond](https://arxiv.org/pdf/1903.04243.pdf)). The idea
  behind vectorization is to semantically launch all the invocations of `fn` in
  parallel and fuse corresponding operations across all these invocations. This
  fusion is done statically at graph generation time and the generated code is
  often similar in performance to a manually fused version.

  Because `tf.vectorized_map` fully parallelizes the batch, this method will
  generally be significantly faster than using `tf.map_fn`, especially in eager
  mode. However this is an experimental feature and currently has a lot of
  limitations:
    - There should be no data dependency between the different semantic
      invocations of `fn`, i.e. it should be safe to map the elements of the
      inputs in any order.
    - Stateful kernels may mostly not be supported since these often imply a
      data dependency. We do support a limited set of such stateful kernels
      though (like RandomFoo, Variable operations like reads, etc).
    - `fn` has limited support for control flow operations.
    - `fn` should return nested structure of Tensors or Operations. However
      if an Operation is returned, it should have zero outputs.
    - The shape and dtype of any intermediate or output tensors in the
      computation of `fn` should not depend on the input to `fn`.

  Examples:
  ```python
  def outer_product(a):
    return tf.tensordot(a, a, 0)

  batch_size = 100
  a = tf.ones((batch_size, 32, 32))
  c = tf.vectorized_map(outer_product, a)
  assert c.shape == (batch_size, 32, 32, 32, 32)
  ```

  ```python
  # Computing per-example gradients

  batch_size = 10
  num_features = 32
  layer = tf.keras.layers.Dense(1)

  def model_fn(arg):
    with tf.GradientTape() as g:
      inp, label = arg
      inp = tf.expand_dims(inp, 0)
      label = tf.expand_dims(label, 0)
      prediction = layer(inp)
      loss = tf.nn.l2_loss(label - prediction)
    return g.gradient(loss, (layer.kernel, layer.bias))

  inputs = tf.random.uniform([batch_size, num_features])
  labels = tf.random.uniform([batch_size, 1])
  per_example_gradients = tf.vectorized_map(model_fn, (inputs, labels))
  assert per_example_gradients[0].shape == (batch_size, num_features, 1)
  assert per_example_gradients[1].shape == (batch_size, 1)
  ```

  Args:
    fn: The callable to be performed. It accepts one argument, which will have
      the same (possibly nested) structure as `elems`, and returns a possibly
      nested structure of Tensors and Operations, which may be different than
      the structure of `elems`.
    elems: A tensor or (possibly nested) sequence of tensors, each of which will
      be unpacked along their first dimension. The nested sequence of the
      resulting slices will be mapped over by `fn`. The first dimensions of all
      elements must broadcast to a consistent value; equivalently, each
      element tensor must have first dimension of either `B` or `1`, for some
      common batch size `B >= 1`.
    fallback_to_while_loop: If true, on failing to vectorize an operation,
      the unsupported op is wrapped in a tf.while_loop to execute the map
      iterations. Note that this fallback only happens for unsupported ops and
      other parts of `fn` are still vectorized. If false, on encountering an
      unsupported op, a ValueError is thrown. Note that the fallbacks can result
      in slowdowns since vectorization often yields speedup of one to two orders
      of magnitude.

  Returns:
    A tensor or (possibly nested) sequence of tensors. Each tensor packs the
    results of applying fn to tensors unpacked from elems along the first
    dimension, from first to last.

    Although they are less common as user-visible inputs and outputs, note that
    tensors of type `tf.variant` which represent tensor lists (for example from
    `tf.raw_ops.TensorListFromTensor`) are vectorized by stacking the list
    contents rather than the variant itself, and so the container tensor will
    have a scalar shape when returned rather than the usual stacked shape. This
    improves the performance of control flow gradient vectorization.

  Raises:
    ValueError: If vectorization fails and fallback_to_while_loop is False.
  """
  elems = nest.map_structure(ops.convert_to_tensor,
                             elems,
                             expand_composites=True)

  def loop_fn(i):
    gathered_elems = nest.map_structure(
        lambda x: _gather_from_tensor_or_composite(x, i), elems)
    return fn(gathered_elems)

  # Extract batch size from the maximum first dimension of any element.
  flat_elems = nest.flatten(
      nest.map_structure(
          functools.partial(_composite_to_tensors,
                            is_batched=True),
          elems))
  def _get_shape(x):
    if x.shape.rank is None:
      return None
    return x.shape.as_list()[0]
  static_first_dims = [_get_shape(elem) for elem in flat_elems]
  if any(s is None for s in static_first_dims):
    batch_size = math_ops.reduce_max(
        [array_ops.shape(elem)[0] for elem in flat_elems])
  else:
    batch_size = max(static_first_dims)

  return pfor(loop_fn, batch_size,
              fallback_to_while_loop=fallback_to_while_loop)
Exemplo n.º 52
0
def static_rnn(cell,
               inputs,
               initial_state=None,
               dtype=None,
               sequence_length=None,
               scope=None):
    """Creates a recurrent neural network specified by RNNCell `cell`.

  The simplest form of RNN network generated is:

  ```python
    state = cell.zero_state(...)
    outputs = []
    for input_ in inputs:
      output, state = cell(input_, state)
      outputs.append(output)
    return (outputs, state)
  ```
  However, a few other options are available:

  An initial state can be provided.
  If the sequence_length vector is provided, dynamic calculation is performed.
  This method of calculation does not compute the RNN steps past the maximum
  sequence length of the minibatch (thus saving computational time),
  and properly propagates the state at an example's sequence length
  to the final state output.

  The dynamic calculation performed is, at time `t` for batch row `b`,

  ```python
    (output, state)(b, t) =
      (t >= sequence_length(b))
        ? (zeros(cell.output_size), states(b, sequence_length(b) - 1))
        : cell(input(b, t), state(b, t - 1))
  ```

  Args:
    cell: An instance of RNNCell.
    inputs: A length T list of inputs, each a `Tensor` of shape
      `[batch_size, input_size]`, or a nested tuple of such elements.
    initial_state: (optional) An initial state for the RNN.
      If `cell.state_size` is an integer, this must be
      a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
      If `cell.state_size` is a tuple, this should be a tuple of
      tensors having shapes `[batch_size, s] for s in cell.state_size`.
    dtype: (optional) The data type for the initial state and expected output.
      Required if initial_state is not provided or RNN state has a heterogeneous
      dtype.
    sequence_length: Specifies the length of each sequence in inputs.
      An int32 or int64 vector (tensor) size `[batch_size]`, values in `[0, T)`.
    scope: VariableScope for the created subgraph; defaults to "rnn".

  Returns:
    A pair (outputs, state) where:

    - outputs is a length T list of outputs (one for each input), or a nested
      tuple of such elements.
    - state is the final state

  Raises:
    TypeError: If `cell` is not an instance of RNNCell.
    ValueError: If `inputs` is `None` or an empty list, or if the input depth
      (column size) cannot be inferred from inputs via shape inference.
  """

    if not isinstance(cell, core_rnn_cell.RNNCell):
        raise TypeError("cell must be an instance of RNNCell")
    if not nest.is_sequence(inputs):
        raise TypeError("inputs must be a sequence")
    if not inputs:
        raise ValueError("inputs must not be empty")

    outputs = []
    # Create a new scope in which the caching device is either
    # determined by the parent scope, or is set to place the cached
    # Variable using the same placement as for the rest of the RNN.
    with vs.variable_scope(scope or "rnn") as varscope:
        if varscope.caching_device is None:
            varscope.set_caching_device(lambda op: op.device)

        # Obtain the first sequence of the input
        first_input = inputs
        while nest.is_sequence(first_input):
            first_input = first_input[0]

        # Temporarily avoid EmbeddingWrapper and seq2seq badness
        # TODO(lukaszkaiser): remove EmbeddingWrapper
        if first_input.get_shape().ndims != 1:

            input_shape = first_input.get_shape().with_rank_at_least(2)
            fixed_batch_size = input_shape[0]

            flat_inputs = nest.flatten(inputs)
            for flat_input in flat_inputs:
                input_shape = flat_input.get_shape().with_rank_at_least(2)
                batch_size, input_size = input_shape[0], input_shape[1:]
                fixed_batch_size.merge_with(batch_size)
                for i, size in enumerate(input_size):
                    if size.value is None:
                        raise ValueError(
                            "Input size (dimension %d of inputs) must be accessible via "
                            "shape inference, but saw value None." % i)
        else:
            fixed_batch_size = first_input.get_shape().with_rank_at_least(1)[0]

        if fixed_batch_size.value:
            batch_size = fixed_batch_size.value
        else:
            batch_size = array_ops.shape(first_input)[0]
        if initial_state is not None:
            state = initial_state
        else:
            if not dtype:
                raise ValueError("If no initial_state is provided, "
                                 "dtype must be specified")
            state = cell.zero_state(batch_size, dtype)

        if sequence_length is not None:  # Prepare variables
            sequence_length = ops.convert_to_tensor(sequence_length,
                                                    name="sequence_length")
            if sequence_length.get_shape().ndims not in (None, 1):
                raise ValueError(
                    "sequence_length must be a vector of length batch_size")

            def _create_zero_output(output_size):
                # convert int to TensorShape if necessary
                size = _state_size_with_prefix(output_size,
                                               prefix=[batch_size])
                output = array_ops.zeros(array_ops.stack(size),
                                         _infer_state_dtype(dtype, state))
                shape = _state_size_with_prefix(
                    output_size, prefix=[fixed_batch_size.value])
                output.set_shape(tensor_shape.TensorShape(shape))
                return output

            output_size = cell.output_size
            flat_output_size = nest.flatten(output_size)
            flat_zero_output = tuple(
                _create_zero_output(size) for size in flat_output_size)
            zero_output = nest.pack_sequence_as(structure=output_size,
                                                flat_sequence=flat_zero_output)

            sequence_length = math_ops.to_int32(sequence_length)
            min_sequence_length = math_ops.reduce_min(sequence_length)
            max_sequence_length = math_ops.reduce_max(sequence_length)

        for time, input_ in enumerate(inputs):
            if time > 0: varscope.reuse_variables()
            # pylint: disable=cell-var-from-loop
            call_cell = lambda: cell(input_, state)
            # pylint: enable=cell-var-from-loop
            if sequence_length is not None:
                (output,
                 state) = _rnn_step(time=time,
                                    sequence_length=sequence_length,
                                    min_sequence_length=min_sequence_length,
                                    max_sequence_length=max_sequence_length,
                                    zero_output=zero_output,
                                    state=state,
                                    call_cell=call_cell,
                                    state_size=cell.state_size)
            else:
                (output, state) = call_cell()

            outputs.append(output)

        return (outputs, state)
Exemplo n.º 53
0
def normalize(x):
    """Project `x` into the range [0, 1]"""
    return (x - math_ops.reduce_min(x)) / (math_ops.reduce_max(x) -
                                           math_ops.reduce_min(x))
Exemplo n.º 54
0
  def all_gather(self,
                 input_tensor,
                 axis,
                 communication_hint='AUTO',
                 timeout=0):
    """All-gather a dense tensor.

    This method must be called inside a tf.function.

    Args:
      input_tensor: a dense tensor. It must have the same rank on all replicas,
        and dimensions other than `axis` need to be the same as well.
      axis: 0-D int32 Tensor. Dimension along which to gather. Must be in the
        range [0, rank(value)).
      communication_hint: string providing hint to runtime for choosing
        collective implementation. Available options are `AUTO`, `NCCL`, and
        `RING`.
      timeout: a float. The timeout in seconds.

    Returns:
      The gathered Tensor.

    Raises:
      RuntimeError: if called in eager mode.
    """
    if context.executing_eagerly():
      raise RuntimeError('all_gather in eager mode is not supported')

    with ops.device(self._device), \
         ops.control_dependencies([array_ops.identity(input_tensor)]):
      # 1. Transpose
      # E.g. Given an input_tensor with shape [2,2,5,1] and axis to gather is 3,
      # we use perm_pre=[3 0 1 2] to reshape it to [1,2,2,5], which
      # brings the 3rd dim first; afterwards we use perm_after=[1,2,3,0] to
      # place it back.
      perm_pre = array_ops.concat(
          ([axis], math_ops.range(axis),
           math_ops.range(axis + 1, array_ops.rank(input_tensor))),
          axis=0)
      input_tensor_t = array_ops.transpose(input_tensor, perm=perm_pre)
      # 2. Pad
      gathered_shape = self._all_gather(
          array_ops.expand_dims_v2(array_ops.shape_v2(input_tensor_t), axis=0),
          communication_hint,
          timeout=timeout)
      first_dims = gathered_shape[:, 0]
      full_axis_dim = math_ops.reduce_max(first_dims)
      padded_input_tensor = _pad_util(input_tensor_t, full_axis_dim)

      # 3. Gather
      gather_padded_out_tensor = self._all_gather(
          padded_input_tensor, communication_hint, timeout=timeout)
      # 4. Unpad
      split_tensors = []
      for i in range(self._group_size):
        start_pos = i * full_axis_dim
        split_tensors.append(gather_padded_out_tensor[start_pos:start_pos +
                                                      first_dims[i]])
      out_tensor_t = array_ops.concat(split_tensors, 0)

      # 5. Transpose back
      perm_after = array_ops.concat(
          (math_ops.range(1, axis + 1), [0],
           math_ops.range(axis + 1, array_ops.rank(input_tensor_t))),
          axis=0)
      return array_ops.transpose(out_tensor_t, perm=perm_after)
Exemplo n.º 55
0
def indicators_to_sparse_ids(indicators,
                             ignore_value=None,
                             dtype=dtypes.int64):
    """Convert a dense indicator tensor to sparse IDs.

  This is commonly used for converting a dense classification label to sparse.
  In the following example, we have an input of shape (2, 2, num_classes),
  where num_classes=4.

  ```python
  indicators = [
    [
      [0, 0, 1, 0],
      [0, 0, 0, 0]
    ], [
      [1, 0, 1, 1],
      [0, 0, 1, 0]
    ]
  ]
  sparse_ids = indicator_to_sparse_ids(indicators)
  ```

  `sparse_ids` in "jagged" format:
  [
    [
      [2],
      []
    ], [
      [0, 2, 3],
      [2]
    ]
  ]

  `sparse_ids` in `SparseTensor` format:
  ```python
  {
    indices: [[0, 0, 1], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 1, 0]],
    values: [2, 0, 2, 3, 2],
    dense_shape: [2, 2, 3]
  }
  ```

  Args:
    indicators: Dense `Tensor` of shape `(d0, ..., dn, num_classes)`.
      `ignore_value` values are ignored. For other values (typically, ones), the
      index along the last dimension is returned.
    ignore_value: Entries in `indicators` equal to this value will be
      absent from the returned `SparseTensor`. If `None`, default value of
      `indicators` dtype will be used (e.g. '' for `str`, 0 for `int`).
    dtype: Type of result, must be integer type.

  Returns:
    `SparseTensor` of type `dtype` and shape `(d0, ..., dn, max_num_labels)`,
      where `max_num_labels` is the maximum number of non-zero values in any
      row (in the example above, row (1, 1) has 3 non-zero values, so the result
      shape is (2, 2, 3)). The values of this `SparseTensor` are in the range
      `[0, num_classes)` and correspond to the index of non-ignore values along
      the last dimension of `indicators`.

  Raises:
    ValueError: if `dtype` is not integer.
  """
    if not dtype.is_integer:
        raise ValueError("Invalid dtype {} not integer.".format(dtype))
    with ops.name_scope(None, "indicators_to_sparse_ids",
                        (indicators, ignore_value)):
        # Convert indicators to binary ones and zeros. We use int64 since
        # SparseTensor requires int64 indices.
        indicators = ops.convert_to_tensor(indicators, name="indicators")
        missing_indicators = math_ops.equal(indicators,
                                            _ignore_value_tensor(
                                                indicators.dtype,
                                                ignore_value),
                                            name="missing")
        zeros_like_indicators = array_ops.zeros_like(indicators,
                                                     dtype=dtypes.int64,
                                                     name="zeros")
        binary_indicators = array_ops.where(missing_indicators,
                                            zeros_like_indicators,
                                            array_ops.ones_like(
                                                indicators,
                                                dtype=dtypes.int64,
                                                name="ones"),
                                            name="binary_indicators")

        # Use cumsum along the last dimension to generate per-row indexes.
        # Note that these are 1-based (since 0 indicates missing values), so they're
        # off-by-1 from the actual indices. We'll subtract 1 below. Since they're
        # off-by-one, the max value is the size of the last dimension (i.e.,
        # last_index + 1).
        row_index_indicators = array_ops.where(
            missing_indicators, zeros_like_indicators,
            math_ops.cumsum(binary_indicators, axis=-1),
            "row_index_indicators")
        result_last_dim = array_ops.reshape(
            math_ops.reduce_max(row_index_indicators),
            shape=(1, ),
            name="result_last_dim")

        # Convert to a SparseTensor. The values of this SparseTensor are the last
        # indices of our result, and the last indices of this SparseTensor (i.e.,
        # the class IDs indicated by `indicators`) are the values of our result, so
        # we use tensor slicing and concat to swap them.
        sparse_row_index_indicators = dense_to_sparse_tensor(
            row_index_indicators, ignore_value=0)
        return sparse_tensor.SparseTensor(
            indices=array_ops.concat(
                (sparse_row_index_indicators.indices[:, :-1],
                 array_ops.reshape(sparse_row_index_indicators.values - 1,
                                   (-1, 1))),
                axis=1,
                name="indices"),
            values=math_ops.cast(sparse_row_index_indicators.indices[:, -1],
                                 dtype=dtype,
                                 name="values"),
            dense_shape=array_ops.concat(
                (sparse_row_index_indicators.dense_shape[0:-1],
                 result_last_dim),
                axis=0,
                name="dense_shape"))
Exemplo n.º 56
0
    def build(self, input_shape):
        """Builds the layer.

    Creates the variables for the network modeling the densities, creates the
    auxiliary loss estimating the median and tail quantiles of the densities,
    and then uses that to create the probability mass functions and the update
    op that produces the discrete cumulative density functions used by the range
    coder.

    Args:
      input_shape: Shape of the input tensor, used to get the number of
        channels.

    Raises:
      ValueError: if `input_shape` doesn't specify the length of the channel
        dimension.
    """
        input_shape = tensor_shape.TensorShape(input_shape)
        channel_axis = self._channel_axis(input_shape.ndims)
        channels = input_shape[channel_axis].value
        if channels is None:
            raise ValueError(
                "The channel dimension of the inputs must be defined.")
        self.input_spec = engine.InputSpec(ndim=input_shape.ndims,
                                           axes={channel_axis: channels})
        filters = (1, ) + self.filters + (1, )
        scale = self.init_scale**(1 / (len(self.filters) + 1))

        # Create variables.
        self._matrices = []
        self._biases = []
        self._factors = []
        for i in range(len(self.filters) + 1):
            init = np.log(np.expm1(1 / scale / filters[i + 1]))
            matrix = self.add_variable("matrix_{}".format(i),
                                       dtype=self.dtype,
                                       shape=(channels, filters[i + 1],
                                              filters[i]),
                                       initializer=init_ops.Constant(init))
            matrix = nn.softplus(matrix)
            self._matrices.append(matrix)

            bias = self.add_variable("bias_{}".format(i),
                                     dtype=self.dtype,
                                     shape=(channels, filters[i + 1], 1),
                                     initializer=init_ops.RandomUniform(
                                         -.5, .5))
            self._biases.append(bias)

            if i < len(self.filters):
                factor = self.add_variable("factor_{}".format(i),
                                           dtype=self.dtype,
                                           shape=(channels, filters[i + 1], 1),
                                           initializer=init_ops.Zeros())
                factor = math_ops.tanh(factor)
                self._factors.append(factor)

        # To figure out what range of the densities to sample, we need to compute
        # the quantiles given by `tail_mass / 2` and `1 - tail_mass / 2`. Since we
        # can't take inverses of the cumulative directly, we make it an optimization
        # problem:
        # `quantiles = argmin(|logit(cumulative) - target|)`
        # where `target` is `logit(tail_mass / 2)` or `logit(1 - tail_mass / 2)`.
        # Taking the logit (inverse of sigmoid) of the cumulative makes the
        # representation of the right target more numerically stable.

        # Numerically stable way of computing logits of `tail_mass / 2`
        # and `1 - tail_mass / 2`.
        target = np.log(2 / self.tail_mass - 1)
        # Compute lower and upper tail quantile as well as median.
        target = constant_op.constant([-target, 0, target], dtype=self.dtype)

        def quantiles_initializer(shape, dtype=None, partition_info=None):
            del partition_info  # unused
            assert tuple(shape[1:]) == (1, 3)
            init = constant_op.constant(
                [[[-self.init_scale, 0, self.init_scale]]], dtype=dtype)
            return array_ops.tile(init, (shape[0], 1, 1))

        quantiles = self.add_variable("quantiles",
                                      shape=(channels, 1, 3),
                                      dtype=self.dtype,
                                      initializer=quantiles_initializer)
        logits = self._logits_cumulative(quantiles, stop_gradient=True)
        loss = math_ops.reduce_sum(abs(logits - target))
        self.add_loss(loss, inputs=None)

        # Save medians for `call`, `compress`, and `decompress`.
        self._medians = quantiles[:, :, 1:2]
        if not self.optimize_integer_offset:
            self._medians = math_ops.round(self._medians)

        # Largest distance observed between lower tail quantile and median,
        # or between median and upper tail quantile.
        minima = math_ops.reduce_max(self._medians - quantiles[:, :, 0:1])
        maxima = math_ops.reduce_max(quantiles[:, :, 2:3] - self._medians)
        minmax = math_ops.maximum(minima, maxima)
        minmax = math_ops.ceil(minmax)
        minmax = math_ops.maximum(minmax, 1)

        # Sample the density up to `minmax` around the median.
        samples = math_ops.range(-minmax, minmax + 1, dtype=self.dtype)
        samples += self._medians

        half = constant_op.constant(.5, dtype=self.dtype)
        # We strip the sigmoid from the end here, so we can use the special rule
        # below to only compute differences in the left tail of the sigmoid.
        # This increases numerical stability (see explanation in `call`).
        lower = self._logits_cumulative(samples - half, stop_gradient=True)
        upper = self._logits_cumulative(samples + half, stop_gradient=True)
        # Flip signs if we can move more towards the left tail of the sigmoid.
        sign = -math_ops.sign(math_ops.add_n([lower, upper]))
        pmf = abs(
            math_ops.sigmoid(sign * upper) - math_ops.sigmoid(sign * lower))
        # Add tail masses to first and last bin of pmf, as we clip values for
        # compression, meaning that out-of-range values get mapped to these bins.
        pmf = array_ops.concat([
            math_ops.add_n([pmf[:, 0, :1],
                            math_ops.sigmoid(lower[:, 0, :1])]),
            pmf[:, 0, 1:-1],
            math_ops.add_n(
                [pmf[:, 0, -1:],
                 math_ops.sigmoid(-upper[:, 0, -1:])]),
        ],
                               axis=-1)
        self._pmf = pmf

        cdf = coder_ops.pmf_to_quantized_cdf(
            pmf, precision=self.range_coder_precision)

        def cdf_getter(*args, **kwargs):
            del args, kwargs  # ignored
            return variable_scope.get_variable("quantized_cdf",
                                               dtype=dtypes.int32,
                                               initializer=cdf,
                                               trainable=False,
                                               validate_shape=False,
                                               collections=())

        # Need to provide a fake shape here since add_variable insists on it.
        self._quantized_cdf = self.add_variable("quantized_cdf",
                                                shape=(channels, 1),
                                                dtype=dtypes.int32,
                                                getter=cdf_getter,
                                                trainable=False)

        update_op = state_ops.assign(self._quantized_cdf,
                                     cdf,
                                     validate_shape=False)
        self.add_update(update_op, inputs=None)

        super(EntropyBottleneck, self).build(input_shape)
Exemplo n.º 57
0
def lifted_struct_loss(labels, embeddings, margin=1.0):
    """Computes the lifted structured loss.

  The loss encourages the positive distances (between a pair of embeddings
  with the same labels) to be smaller than any negative distances (between a
  pair of embeddings with different labels) in the mini-batch in a way
  that is differentiable with respect to the embedding vectors.
  See: https://arxiv.org/abs/1511.06452.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    lifted_loss: tf.float32 scalar.
  """
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pairwise_distances = pairwise_distance(embeddings)

    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    diff = margin - pairwise_distances
    mask = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    # Safe maximum: Temporarily shift negative distances
    #   above zero before taking max.
    #     this is to take the max only among negatives.
    row_minimums = math_ops.reduce_min(diff, 1, keep_dims=True)
    row_negative_maximums = math_ops.reduce_max(
        math_ops.multiply(diff - row_minimums,
                          mask), 1, keep_dims=True) + row_minimums

    # Compute the loss.
    # Keep track of matrix of maximums where M_ij = max(m_i, m_j)
    #   where m_i is the max of alpha - negative D_i's.
    # This matches the Caffe loss layer implementation at:
    #   https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp  # pylint: disable=line-too-long

    max_elements = math_ops.maximum(row_negative_maximums,
                                    array_ops.transpose(row_negative_maximums))
    diff_tiled = array_ops.tile(diff, [batch_size, 1])
    mask_tiled = array_ops.tile(mask, [batch_size, 1])
    max_elements_vect = array_ops.reshape(array_ops.transpose(max_elements),
                                          [-1, 1])

    loss_exp_left = array_ops.reshape(
        math_ops.reduce_sum(math_ops.multiply(
            math_ops.exp(diff_tiled - max_elements_vect), mask_tiled),
                            1,
                            keep_dims=True), [batch_size, batch_size])

    loss_mat = max_elements + math_ops.log(loss_exp_left +
                                           array_ops.transpose(loss_exp_left))
    # Add the positive distance.
    loss_mat += pairwise_distances

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
    num_positives = math_ops.reduce_sum(mask_positives) / 2.0

    lifted_loss = math_ops.truediv(0.25 * math_ops.reduce_sum(
        math_ops.square(
            math_ops.maximum(math_ops.multiply(loss_mat, mask_positives),
                             0.0))),
                                   num_positives,
                                   name='liftedstruct_loss')
    return lifted_loss
Exemplo n.º 58
0
def categorical_hinge(y_true, y_pred):
    pos = math_ops.reduce_sum(y_true * y_pred, axis=-1)
    neg = math_ops.reduce_max((1. - y_true) * y_pred, axis=-1)
    return math_ops.maximum(0., neg - pos + 1.)
Exemplo n.º 59
0
  def all_reduce_indexed_slices(self,
                                input_slices,
                                communication_hint='AUTO',
                                timeout=0):
    """All-reduce an IndexedSlices.

    This method must be called inside a tf.function.

    Args:
      input_slices: an IndexedSlices.
      communication_hint: string providing hint to runtime for choosing
        collective implementation.
      timeout: a float. The timeout in seconds.

    Returns:
      The reduced IndexedSlices.

    Raises:
      RuntimeError: if called in eager mode.
    """
    if context.executing_eagerly():
      raise RuntimeError(
          'all_reduce_indexed_slices in eager mode is not supported')

    # Current CollectiveAllGather implementations require input IndexedSlices to
    # have consistent length across the board, we handle the reduction of
    # IndexedSlices as follows:
    #   1. Gather the lengths of IndexedSlices from all participants.
    #   2. If they have consistent length, apply all_gather.
    #   3. Otherwise convert IndexedSlices to dense tensors and apply
    #      all_reduce.
    with ops.device(self._device):

      def all_gather():
        """Use all_gather to aggregate `IndexedSlices`."""
        all_values = self._all_gather(
            input_slices.values, communication_hint, timeout=timeout)
        # Add control dependency to order the all-gather.
        control = [all_values] if communication_hint == 'NCCL' else []
        with ops.control_dependencies(control):
          all_indices = self._all_gather(
              input_slices.indices, communication_hint, timeout=timeout)
        return ops.IndexedSlices(
            values=all_values,
            indices=all_indices,
            dense_shape=input_slices.dense_shape)

      def densify_and_all_reduce():
        """Use all_reduce to aggregate `IndexedSlices`."""
        densified = ops.convert_to_tensor(input_slices)
        reduced = self.all_reduce(
            densified, communication_hint=communication_hint, timeout=timeout)
        # We have to convert dense grad to IndexedSlice because all_reduce()
        # and all_gather() must have the same return type as required by
        # control_flow_ops.cond.
        return ops.IndexedSlices(
            values=reduced,
            indices=math_ops.range(array_ops.shape(reduced)[0]),
            dense_shape=input_slices.dense_shape)

      length = array_ops.shape(input_slices.indices)
      all_lengths = self._all_gather(
          length, communication_hint, timeout=timeout)
      return control_flow_ops.cond(
          math_ops.equal(
              math_ops.reduce_max(all_lengths),
              math_ops.reduce_min(all_lengths)), all_gather,
          densify_and_all_reduce)
Exemplo n.º 60
0
 def step_fn(data):
     assert_op = control_flow_ops.Assert(
         math_ops.less_equal(math_ops.reduce_max(data), 100.), [data])
     with ops.control_dependencies([assert_op]):
         return math_ops.square(data)