Пример #1
0
    def replace(self, episodes, length, rows=None):
        """Replace full episodes.

    Args:
      episodes: Tuple of transition quantities with batch and time dimensions.
      length: Batch of sequence lengths.
      rows: Episodes to replace, defaults to all.

    Returns:
      Operation.
    """
        rows = tf.range(self._capacity) if rows is None else rows
        assert rows.shape.ndims == 1
        assert_capacity = tf.assert_less(rows,
                                         self._capacity,
                                         message='capacity exceeded')
        with tf.control_dependencies([assert_capacity]):
            assert_max_length = tf.assert_less_equal(
                length, self._max_length, message='max length exceeded')
        replace_ops = []
        with tf.control_dependencies([assert_max_length]):
            for buffer_, elements in zip(self._buffers, episodes):
                replace_op = tf.scatter_update(buffer_, rows, elements)
                replace_ops.append(replace_op)
        with tf.control_dependencies(replace_ops):
            return tf.scatter_update(self._length, rows, length)
Пример #2
0
def position_embeddings_layer(input_shape,
                              position_embedding_name="position_embeddings",
                              initializer_range=0.02,
                              max_position_embeddings=512):
    seq_length = input_shape[1]
    width = input_shape[2]

    assert_op = tf.assert_less_equal(seq_length, max_position_embeddings)

    # control_dependencies是 tensorflow 中的一个flow顺序控制机制
    # 在此处, 运行一下代码块之前会先运行assert op,主要检查输入长度是否小于支持的最大长度
    with tf.control_dependencies([assert_op]):
        full_position_embeddings = create_embedding(
            shape=[max_position_embeddings, width],
            embedding_name=position_embedding_name,
            initializer_range=initializer_range)
        # 直接使用切片取embedding
        position_embeddings = tf.slice(full_position_embeddings, [0, 0],
                                       [seq_length, -1])
        num_dims = len(input_shape)
        position_broadcast_shape = []
        for _ in range(num_dims - 2):
            position_broadcast_shape.append(1)
        position_broadcast_shape.extend([seq_length, width])
        position_embeddings = tf.reshape(position_embeddings,
                                         position_broadcast_shape)

    return position_embeddings
Пример #3
0
def remidify(pitches):
  """Transforms [0, 88) to MIDI pitches [21, 108]."""
  assertions = [
      tf.assert_greater_equal(pitches, 0),
      tf.assert_less_equal(pitches, 87)
  ]
  with tf.control_dependencies(assertions):
    return pitches + 21
Пример #4
0
def demidify(pitches):
  """Transforms MIDI pitches [21,108] to [0, 88)."""
  assertions = [
      tf.assert_greater_equal(pitches, 21),
      tf.assert_less_equal(pitches, 108)
  ]
  with tf.control_dependencies(assertions):
    return pitches - 21
Пример #5
0
def assert_less_equal(*args, **kwargs):
    """
  Wrapper for tf.assert_less_equal
  Overrides tf.device so that the assert always goes on CPU.
  The unwrapped version raises an exception if used with tf.device("/GPU:x").
  """
    with tf.device("/CPU:0"):
        return tf.assert_less_equal(*args, **kwargs)
Пример #6
0
def maybe_split_sequence_lengths(sequence_length, num_splits, total_length):
    """Validates and splits `sequence_length`, if necessary.

  Returned value must be used in graph for all validations to be executed.

  Args:
    sequence_length: A batch of sequence lengths, either sized `[batch_size]`
      and equal to either 0 or `total_length`, or sized
      `[batch_size, num_splits]`.
    num_splits: The scalar number of splits of the full sequences.
    total_length: The scalar total sequence length (potentially padded).

  Returns:
    sequence_length: If input shape was `[batch_size, num_splits]`, returns the
      same Tensor. Otherwise, returns a Tensor of that shape with each input
      length in the batch divided by `num_splits`.
  Raises:
    ValueError: If `sequence_length` is not shaped `[batch_size]` or
      `[batch_size, num_splits]`.
    tf.errors.InvalidArgumentError: If `sequence_length` is shaped
      `[batch_size]` and all values are not either 0 or `total_length`.
  """
    if sequence_length.shape.ndims == 1:
        if total_length % num_splits != 0:
            raise ValueError(
                '`total_length` must be evenly divisible by `num_splits`.')
        with tf.control_dependencies([
                tf.Assert(tf.reduce_all(
                    tf.logical_or(tf.equal(sequence_length, 0),
                                  tf.equal(sequence_length, total_length))),
                          data=[sequence_length])
        ]):
            sequence_length = (tf.tile(tf.expand_dims(sequence_length, axis=1),
                                       [1, num_splits]) // num_splits)
    elif sequence_length.shape.ndims == 2:
        with tf.control_dependencies([
                tf.assert_less_equal(
                    sequence_length,
                    tf.constant(total_length // num_splits, tf.int32),
                    message='Segment length cannot be more than '
                    '`total_length / num_splits`.')
        ]):
            sequence_length = tf.identity(sequence_length)
        sequence_length.set_shape([sequence_length.shape[0], num_splits])
    else:
        raise ValueError(
            'Sequence lengths must be given as a vector or a 2D Tensor whose '
            'second dimension size matches its initial hierarchical split. Got '
            'shape: %s' % sequence_length.shape.as_list())
    return sequence_length
Пример #7
0
    def __call__(self, batch_size):
        """Reads `batch_size` data.

    Args:
      batch_size: Tensor of type `int32`, batch size of the data to be
        retrieved from the dataset. `batch_size` should be less than or
        equal to `max_batch_size`.

    Returns:
       Read data, An iterable of tensors with batch size equal to `batch_size`.
    """
        check_size = tf.assert_less_equal(
            batch_size,
            tf.convert_to_tensor(self._max_batch_size, dtype=tf.int32),
            message=
            'Data set read failure, Batch size greater than max allowed.')
        with tf.control_dependencies([check_size]):
            return _slice_data(self._dataset, batch_size)
Пример #8
0
def psnr(labels, predictions):
  """Computes average peak signal-to-noise ratio of `predictions`.

  Here PSNR is defined with respect to the maximum value of 1. All image tensors
  must be within the range [0, 1].

  Args:
    labels: Tensor of shape [B, H, W, N].
    predictions: Tensor of shape [B, H, W, N].

  Returns:
    Tuple of (psnr, update_op) as returned by tf.metrics.
  """
  predictions.shape.assert_is_compatible_with(labels.shape)
  with tf.control_dependencies([tf.assert_greater_equal(labels, 0.0),
                                tf.assert_less_equal(labels, 1.0)]):
    psnrs = tf.image.psnr(labels, predictions, max_val=1.0)
    psnrs = tf.boolean_mask(psnrs, tf.logical_not(tf.is_inf(psnrs)))
    return tf.metrics.mean(psnrs, name='psnr')
Пример #9
0
  def __call__(self, batch_size):
    """Reads `batch_size` data.

    Args:
      batch_size: Tensor of type `int32`. Batch size of the data to be
        retrieved from the dataset. `batch_size` should be less than or
        equal to the number of examples in the dataset.

    Returns:
       Read data, a list of Tensors with batch size equal to `batch_size`.
    """
    check_size = tf.assert_less_equal(
        batch_size,
        tf.convert_to_tensor(self._num_examples, dtype=tf.int32),
        message='Data set read failure, batch_size > num_examples.'
    )
    with tf.control_dependencies([check_size]):
      self._indices = tf.random.shuffle(
          tf.range(self._num_examples, dtype=tf.int32))
      return _extract_data(self._dataset, self._indices[:batch_size])
Пример #10
0
def assert_rank_at_most(x,
                        rank,
                        data=None,
                        summarize=None,
                        message=None,
                        name=None):
    """Assert `x` has rank equal to `rank` or smaller.

  Example of adding a dependency to an operation:

  ```python
  with tf.control_dependencies([tf.assert_rank_at_most(x, 2)]):
    output = tf.reduce_sum(x)
  ```

  Args:
    x:  Numeric `Tensor`.
    rank:  Scalar `Tensor`.
    data:  The tensors to print out if the condition is False.  Defaults to
      error message and first few entries of `x`.
    summarize: Print this many entries of each tensor.
    message: A string to prefix to the default message.
    name: A name for this operation (optional).
      Defaults to "assert_rank_at_most".

  Returns:
    Op raising `InvalidArgumentError` unless `x` has specified rank or lower.
    If static checks determine `x` has correct rank, a `no_op` is returned.

  Raises:
    ValueError:  If static checks determine `x` has wrong rank.
  """
    with tf.name_scope(name or 'assert_rank_at_most'):
        return tf1.assert_less_equal(tf.rank(x),
                                     rank,
                                     data=data,
                                     summarize=summarize,
                                     message=message)
Пример #11
0
def embed(input_ids,
          vocab_size,
          embedding_size,
          position_offset=0,
          initializer_range=0.02,
          max_position_embeddings=512,
          use_one_hot_embeddings=True):
    """reur and position embeddings
    :param input_ids: int Tensor of shape [batch_size, seq_length].
    :param vocab_size: number of words in vocab
    :param embedding_size: dimensionality of the embedding
    :param position_offset: aka number of cached tokens.
    :param initializer_range: float. Range of the weight initialization.
    :param max_position_embeddings: int. Maximum sequence length.
    :param use_one_hot_embeddings: probably want this to be true
    :return: [batch_size, seq_length, embedding_size] embedded tensor
    """
    (batch_size, seq_length) = get_shape_list(input_ids, expected_rank=2)

    embedding_table = tf.get_variable(
        name='word_embed',
        shape=[vocab_size, embedding_size],
        initializer=create_initializer(initializer_range),
    )

    assert_op = tf.assert_less_equal(tf.reduce_max(input_ids), vocab_size - 1)
    with tf.control_dependencies([assert_op]):
        if use_one_hot_embeddings:
            flat_input_ids = tf.reshape(input_ids, [-1])
            one_hot_input_ids = tf.one_hot(flat_input_ids, depth=vocab_size)
            output_flat = tf.matmul(one_hot_input_ids, embedding_table)
        else:
            output_flat = tf.nn.embedding_lookup(embedding_table, input_ids)

        embedded_input = tf.reshape(output_flat,
                                    [batch_size, seq_length, embedding_size])

    assert_op = tf.assert_less_equal(seq_length, max_position_embeddings)

    with tf.control_dependencies([assert_op]):
        full_position_embeddings = tf.get_variable(
            name='pos_embed',
            shape=[max_position_embeddings, embedding_size],
            initializer=create_initializer(initializer_range),
        )
        # Since the position embedding table is a learned variable, we create it
        # using a (long) sequence length `max_position_embeddings`. The actual
        # sequence length might be shorter than this, for faster training of
        # tasks that do not have long sequences.
        #
        # So `full_position_embeddings` is effectively an embedding table
        # for position [0, 1, 2, ..., max_position_embeddings-1], and the current
        # sequence has positions [0, 1, 2, ... seq_length-1], so we can just
        # perform a slice.
        if position_offset == 0:
            embedded_input += tf.slice(full_position_embeddings, [0, 0],
                                       [seq_length, -1])[None]
        else:
            # Tensorflow is too stupid to allow slicing
            flat_pos_ids = (tf.range(seq_length, dtype=tf.int32) +
                            position_offset)
            one_hot_pos_ids = tf.one_hot(flat_pos_ids,
                                         depth=max_position_embeddings)

            # [seq_length, full_position_embeddings], [full_position_embeddings, dim]
            seq_embeds = tf.matmul(one_hot_pos_ids, full_position_embeddings)
            embedded_input += seq_embeds[None]

            # embedded_input += tf.slice(full_position_embeddings[position_offset:], [0, 0], [seq_length, -1])[None]

    return layer_norm(embedded_input, name='embed_norm'), embedding_table
Пример #12
0
    def __init__(self,
                 batch_size,
                 total_num_examples,
                 max_learning_rate=1.,
                 preconditioner_decay_rate=0.95,
                 burnin=25,
                 burnin_max_learning_rate=1e-6,
                 use_single_learning_rate=False,
                 name=None):
        default_name = 'VariationalSGD'
        with tf1.name_scope(name, default_name, [
                max_learning_rate, preconditioner_decay_rate, batch_size,
                burnin, burnin_max_learning_rate
        ]):
            self._preconditioner_decay_rate = tf.convert_to_tensor(
                value=preconditioner_decay_rate,
                name='preconditioner_decay_rate')
            self._batch_size = tf.convert_to_tensor(value=batch_size,
                                                    name='batch_size')
            self._total_num_examples = tf.convert_to_tensor(
                value=total_num_examples, name='total_num_examples')

            self._burnin = tf.convert_to_tensor(value=burnin,
                                                name='burnin',
                                                dtype=dtype_util.common_dtype(
                                                    [burnin],
                                                    dtype_hint=tf.int64))
            self._burnin_max_learning_rate = tf.convert_to_tensor(
                value=burnin_max_learning_rate,
                name='burnin_max_learning_rate')
            self._max_learning_rate = tf.convert_to_tensor(
                value=max_learning_rate, name='max_learning_rate')
            self._use_single_learning_rate = use_single_learning_rate

            self._preconditioner_decay_rate = distribution_util.with_dependencies(
                [
                    tf1.assert_non_negative(
                        self._preconditioner_decay_rate,
                        message=
                        '`preconditioner_decay_rate` must be non-negative'),
                    tf1.assert_less_equal(
                        self._preconditioner_decay_rate,
                        1.,
                        message='`preconditioner_decay_rate` must be at most 1.'
                    ),
                ], self._preconditioner_decay_rate)

            self._batch_size = distribution_util.with_dependencies([
                tf1.assert_greater(
                    self._batch_size,
                    0,
                    message='`batch_size` must be greater than zero')
            ], self._batch_size)

            self._total_num_examples = distribution_util.with_dependencies([
                tf1.assert_greater(
                    self._total_num_examples,
                    0,
                    message='`total_num_examples` must be greater than zero')
            ], self._total_num_examples)

            self._burnin = distribution_util.with_dependencies([
                tf1.assert_non_negative(
                    self._burnin, message='`burnin` must be non-negative'),
                tf1.assert_integer(self._burnin,
                                   message='`burnin` must be an integer')
            ], self._burnin)

            self._burnin_max_learning_rate = distribution_util.with_dependencies(
                [
                    tf1.assert_non_negative(
                        self._burnin_max_learning_rate,
                        message=
                        '`burnin_max_learning_rate` must be non-negative')
                ], self._burnin_max_learning_rate)

            self._max_learning_rate = distribution_util.with_dependencies([
                tf1.assert_non_negative(
                    self._max_learning_rate,
                    message='`max_learning_rate` must be non-negative')
            ], self._max_learning_rate)

            super(VariationalSGD, self).__init__(name=name or default_name)
Пример #13
0
def embedding_postprocessor(input_tensor,
                            use_token_type=False,
                            token_type_ids=None,
                            token_type_vocab_size=None,
                            token_type_embedding_name="token_type_embeddings",
                            use_position_embeddings=True,
                            reset_position_index_per_cell=False,
                            position_embedding_name="position_embeddings",
                            initializer_range=0.02,
                            max_position_embeddings=512,
                            extra_embeddings=None,
                            dropout_prob=0.1):
  """Performs various post-processing on a word embedding tensor.

  Args:
    input_tensor: float Tensor of shape [batch_size, seq_length,
      embedding_size].
    use_token_type: bool. Whether to add embeddings for `token_type_ids`.
    token_type_ids: (optional) nested structure of int32 Tensors of shape
      [batch_size, seq_length]. Must be specified if `use_token_type` is True.
    token_type_vocab_size: nested structure of ints. The vocabulary size of
      `token_type_ids`. Must match the structure of `token_type_ids`.
    token_type_embedding_name: string. The name of the embedding table variable
      for token type ids.
    use_position_embeddings: bool. Whether to add position embeddings for the
      position of each token in the sequence.
    reset_position_index_per_cell: bool. Whether to restart position index when
      a new cell starts.
    position_embedding_name: string. The name of the embedding table variable
      for positional embeddings.
    initializer_range: float. Range of the weight initialization.
    max_position_embeddings: int. Maximum sequence length that might ever be
      used with this model. This can be longer than the sequence length of
      input_tensor, but cannot be shorter.
    extra_embeddings: (optional) float32 Tensor of shape [batch_size,
      seq_length, embedding_dim]. Additional embeddings concatenated with all
      the other embeddings.
    dropout_prob: float. Dropout probability applied to the final output tensor.

  Returns:
    float tensor with same shape as `input_tensor`.

  Raises:
    ValueError: One of the tensor shapes or input values is invalid.
  """
  input_shape = get_shape_list(input_tensor, expected_rank=3)
  batch_size = input_shape[0]
  seq_length = input_shape[1]
  width = input_shape[2]

  output = input_tensor

  if use_token_type:
    if token_type_ids is None:
      raise ValueError("`token_type_ids` must be specified if"
                       "`use_token_type` is True.")

    tf.nest.assert_same_structure(token_type_ids, token_type_vocab_size)
    token_type_ids = tf.nest.flatten(token_type_ids)
    token_type_vocab_size = tf.nest.flatten(token_type_vocab_size)

    for i, (type_ids, type_vocab_size) in enumerate(
        zip(token_type_ids, token_type_vocab_size)):
      token_type_table = tf.get_variable(
          name="%s_%d" % (token_type_embedding_name, i),
          shape=[type_vocab_size, width],
          initializer=create_initializer(initializer_range))
      # This vocab will be small so we always do one-hot here, since it is
      # always faster for a small vocabulary.
      flat_token_type_ids = tf.reshape(type_ids, [-1])
      one_hot_ids = tf.one_hot(flat_token_type_ids, depth=type_vocab_size)
      token_type_embeddings = tf.matmul(one_hot_ids, token_type_table)
      token_type_embeddings = tf.reshape(token_type_embeddings,
                                         [batch_size, seq_length, width])
      output += token_type_embeddings

  if use_position_embeddings:
    full_position_embeddings = tf.get_variable(
        name=position_embedding_name,
        shape=[max_position_embeddings, width],
        initializer=create_initializer(initializer_range))
    if not reset_position_index_per_cell:
      assert_op = tf.assert_less_equal(seq_length, max_position_embeddings)
      with tf.control_dependencies([assert_op]):
        num_dims = len(output.shape.as_list())
        position_embeddings = _get_absolute_position_embeddings(
            full_position_embeddings,
            seq_length=seq_length,
            width=width,
            num_dims=num_dims,
        )
    else:
      position_embeddings = _get_relative_position_embeddings(
          full_position_embeddings,
          token_type_ids,
          token_type_vocab_size,
          seq_length,
          batch_size,
          max_position_embeddings,
      )
    output += position_embeddings

  if extra_embeddings is not None:
    flat_extra_embeddings = tf.reshape(extra_embeddings,
                                       [batch_size * seq_length, -1])
    flat_extra_embeddings = tf.layers.dense(
        flat_extra_embeddings,
        width,
        kernel_initializer=create_initializer(initializer_range))
    output += tf.reshape(flat_extra_embeddings, [batch_size, seq_length, width])

  output = layer_norm_and_dropout(output, dropout_prob)
  return output
Пример #14
0
def preprocess_example(example_proto, hparams, is_training):
    """Compute spectral representation, labels, and length from sequence/audio.

  Args:
    example_proto: Example that has not been preprocessed.
    hparams: HParams object specifying hyperparameters.
    is_training: Whether or not this is a training run.

  Returns:
    An InputTensors tuple.

  Raises:
    ValueError: If hparams is contains an invalid spec_type.
  """
    record = parse_example(example_proto)
    sequence_id = record['id']
    sequence = record['sequence']
    audio = record['audio']
    velocity_range = record['velocity_range']

    wav_jitter_amount_ms = label_jitter_amount_ms = 0
    # if there is combined jitter, we must generate it once here
    if is_training and hparams.jitter_amount_ms > 0:
        wav_jitter_amount_ms = np.random.choice(hparams.jitter_amount_ms,
                                                size=1)
        label_jitter_amount_ms = wav_jitter_amount_ms

    if label_jitter_amount_ms > 0:
        sequence = jitter_label_op(sequence, label_jitter_amount_ms / 1000.)

    # possibly shift the entire sequence backward for better forward only training
    if hparams.backward_shift_amount_ms > 0:
        sequence = jitter_label_op(sequence,
                                   hparams.backward_shift_amount_ms / 1000.)

    if is_training:
        audio = transform_wav_data_op(audio,
                                      hparams=hparams,
                                      jitter_amount_sec=wav_jitter_amount_ms /
                                      1000.)

    spec = wav_to_spec_op(audio, hparams=hparams)
    spectrogram_hash = get_spectrogram_hash_op(spec)

    labels, label_weights, onsets, offsets, velocities = sequence_to_pianoroll_op(
        sequence, velocity_range, hparams=hparams)

    length = wav_to_num_frames_op(audio, hparams_frames_per_second(hparams))

    asserts = []
    if hparams.max_expected_train_example_len and is_training:
        asserts.append(
            tf.assert_less_equal(length,
                                 hparams.max_expected_train_example_len))

    with tf.control_dependencies(asserts):
        return InputTensors(spec=spec,
                            spectrogram_hash=spectrogram_hash,
                            labels=labels,
                            label_weights=label_weights,
                            length=length,
                            onsets=onsets,
                            offsets=offsets,
                            velocities=velocities,
                            sequence_id=sequence_id,
                            note_sequence=sequence)
Пример #15
0
def expected_calibration_error(y_true, y_pred, nbins=20):
    """Calculates Expected Calibration Error (ECE).

  ECE is a scalar summary statistic of calibration error. It is the
  sample-weighted average of the difference between the predicted and true
  probabilities of a positive detection across uniformly-spaced model
  confidences [0, 1]. See referenced paper for a thorough explanation.

  Reference:
    Guo, et. al, "On Calibration of Modern Neural Networks"
    Page 2, Expected Calibration Error (ECE).
    https://arxiv.org/pdf/1706.04599.pdf

  This function creates three local variables, `bin_counts`, `bin_true_sum`, and
  `bin_preds_sum` that are used to compute ECE.  For estimation of the metric
  over a stream of data, the function creates an `update_op` operation that
  updates these variables and returns the ECE.

  Args:
    y_true: 1-D tf.int64 Tensor of binarized ground truth, corresponding to each
      prediction in y_pred.
    y_pred: 1-D tf.float32 tensor of model confidence scores in range
      [0.0, 1.0].
    nbins: int specifying the number of uniformly-spaced bins into which y_pred
      will be bucketed.

  Returns:
    value_op: A value metric op that returns ece.
    update_op: An operation that increments the `bin_counts`, `bin_true_sum`,
      and `bin_preds_sum` variables appropriately and whose value matches `ece`.

  Raises:
    InvalidArgumentError: if y_pred is not in [0.0, 1.0].
  """
    bin_counts = metrics_impl.metric_variable([nbins],
                                              tf.float32,
                                              name='bin_counts')
    bin_true_sum = metrics_impl.metric_variable([nbins],
                                                tf.float32,
                                                name='true_sum')
    bin_preds_sum = metrics_impl.metric_variable([nbins],
                                                 tf.float32,
                                                 name='preds_sum')

    with tf.control_dependencies([
            tf.assert_greater_equal(y_pred, 0.0),
            tf.assert_less_equal(y_pred, 1.0),
    ]):
        bin_ids = tf.histogram_fixed_width_bins(y_pred, [0.0, 1.0],
                                                nbins=nbins)

    with tf.control_dependencies([bin_ids]):
        update_bin_counts_op = tf.assign_add(
            bin_counts,
            tf.cast(tf.bincount(bin_ids, minlength=nbins), dtype=tf.float32))
        update_bin_true_sum_op = tf.assign_add(
            bin_true_sum,
            tf.cast(tf.bincount(bin_ids, weights=y_true, minlength=nbins),
                    dtype=tf.float32))
        update_bin_preds_sum_op = tf.assign_add(
            bin_preds_sum,
            tf.cast(tf.bincount(bin_ids, weights=y_pred, minlength=nbins),
                    dtype=tf.float32))

    ece_update_op = _ece_from_bins(update_bin_counts_op,
                                   update_bin_true_sum_op,
                                   update_bin_preds_sum_op,
                                   name='update_op')
    ece = _ece_from_bins(bin_counts, bin_true_sum, bin_preds_sum, name='value')
    return ece, ece_update_op
Пример #16
0
def preprocess_data(sequence_id, sequence, audio, velocity_range, hparams,
                    is_training):
  """Compute spectral representation, labels, and length from sequence/audio.

  Args:
    sequence_id: id of the sequence.
    sequence: String tensor containing serialized NoteSequence proto.
    audio: String tensor containing containing WAV data.
    velocity_range: String tensor containing max and min velocities of file as a
      serialized VelocityRange.
    hparams: HParams object specifying hyperparameters.
    is_training: Whether or not this is a training run.

  Returns:
    An InputTensors tuple.

  Raises:
    ValueError: If hparams is contains an invalid spec_type.
  """

  wav_jitter_amount_ms = label_jitter_amount_ms = 0
  # if there is combined jitter, we must generate it once here
  if is_training and hparams.jitter_amount_ms > 0:
    wav_jitter_amount_ms = np.random.choice(hparams.jitter_amount_ms, size=1)
    label_jitter_amount_ms = wav_jitter_amount_ms

  if label_jitter_amount_ms > 0:
    sequence = jitter_label_op(sequence, label_jitter_amount_ms / 1000.)

  # possibly shift the entire sequence backward for better forward only training
  if hparams.backward_shift_amount_ms > 0:
    sequence = jitter_label_op(sequence,
                               hparams.backward_shift_amount_ms / 1000.)

  if is_training:
    audio = transform_wav_data_op(
        audio,
        hparams=hparams,
        jitter_amount_sec=wav_jitter_amount_ms / 1000.)

  if hparams.spec_type == 'tflite_compat_mel':
    assert hparams.spec_log_amplitude
    spec = tflite_compat_mel(audio, hparams=hparams)
  else:
    spec = wav_to_spec_op(audio, hparams=hparams)
  spectrogram_hash = get_spectrogram_hash_op(spec)

  labels, label_weights, onsets, offsets, velocities = sequence_to_pianoroll_op(
      sequence, velocity_range, hparams=hparams)

  length = wav_to_num_frames_op(audio, hparams_frames_per_second(hparams))

  asserts = []
  if hparams.max_expected_train_example_len and is_training:
    asserts.append(
        tf.assert_less_equal(length, hparams.max_expected_train_example_len))

  with tf.control_dependencies(asserts):
    return InputTensors(
        spec=spec,
        spectrogram_hash=spectrogram_hash,
        labels=labels,
        label_weights=label_weights,
        length=length,
        onsets=onsets,
        offsets=offsets,
        velocities=velocities,
        sequence_id=sequence_id,
        note_sequence=sequence)
Пример #17
0
def embedding_postprocessor(input_tensor,
                            use_token_type=False,
                            token_type_ids=None,
                            token_type_vocab_size=16,
                            token_type_embedding_name="token_type_embeddings",
                            use_position_embeddings=True,
                            position_embedding_name="position_embeddings",
                            initializer_range=0.02,
                            max_position_embeddings=512,
                            dropout_prob=0.1):
    """Performs various post-processing on a word embedding tensor.

    Args:
      input_tensor: float Tensor of shape [batch_size, seq_length,
        embedding_size].
      use_token_type: bool. Whether to add embeddings for `token_type_ids`.
      token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
        Must be specified if `use_token_type` is True.
      token_type_vocab_size: int. The vocabulary size of `token_type_ids`.
      token_type_embedding_name: string. The name of the embedding table variable
        for token type ids.
      use_position_embeddings: bool. Whether to add position embeddings for the
        position of each token in the sequence.
      position_embedding_name: string. The name of the embedding table variable
        for positional embeddings.
      initializer_range: float. Range of the weight initialization.
      max_position_embeddings: int. Maximum sequence length that might ever be
        used with this model. This can be longer than the sequence length of
        input_tensor, but cannot be shorter.
      dropout_prob: float. Dropout probability applied to the final output tensor.

    Returns:
      float tensor with same shape as `input_tensor`.

    Raises:
      ValueError: One of the tensor shapes or input values is invalid.
    """
    input_shape = get_shape_list(input_tensor, expected_rank=3)
    batch_size = input_shape[0]
    seq_length = input_shape[1]
    width = input_shape[2]

    output = input_tensor

    if use_token_type:
        if token_type_ids is None:
            raise ValueError("`token_type_ids` must be specified if"
                             "`use_token_type` is True.")
        token_type_table = tf.get_variable(
            name=token_type_embedding_name,
            shape=[token_type_vocab_size, width],
            initializer=create_initializer(initializer_range))
        # This vocab will be small so we always do one-hot here, since it is always
        # faster for a small vocabulary.
        flat_token_type_ids = tf.reshape(token_type_ids, [-1])
        one_hot_ids = tf.one_hot(flat_token_type_ids, depth=token_type_vocab_size)
        token_type_embeddings = tf.matmul(one_hot_ids, token_type_table)
        token_type_embeddings = tf.reshape(token_type_embeddings,
                                           [batch_size, seq_length, width])
        output += token_type_embeddings

    if use_position_embeddings:
        assert_op = tf.assert_less_equal(seq_length, max_position_embeddings)
        with tf.control_dependencies([assert_op]):
            full_position_embeddings = tf.get_variable(
                name=position_embedding_name,
                shape=[max_position_embeddings, width],
                initializer=create_initializer(initializer_range))
            # Since the position embedding table is a learned variable, we create it
            # using a (long) sequence length `max_position_embeddings`. The actual
            # sequence length might be shorter than this, for faster training of
            # tasks that do not have long sequences.
            #
            # So `full_position_embeddings` is effectively an embedding table
            # for position [0, 1, 2, ..., max_position_embeddings-1], and the current
            # sequence has positions [0, 1, 2, ... seq_length-1], so we can just
            # perform a slice.
            position_embeddings = tf.slice(full_position_embeddings, [0, 0],
                                           [seq_length, -1])
            num_dims = len(output.shape.as_list())

            # Only the last two dimensions are relevant (`seq_length` and `width`), so
            # we broadcast among the first dimensions, which is typically just
            # the batch size.
            position_broadcast_shape = []
            for _ in range(num_dims - 2):
                position_broadcast_shape.append(1)
            position_broadcast_shape.extend([seq_length, width])
            position_embeddings = tf.reshape(position_embeddings,
                                             position_broadcast_shape)
            output += position_embeddings

    output = layer_norm_and_dropout(output, dropout_prob)
    return output
Пример #18
0
    def __init__(self,
                 learning_rate,
                 preconditioner_decay_rate=0.95,
                 data_size=1,
                 burnin=25,
                 diagonal_bias=1e-8,
                 name=None,
                 parallel_iterations=10):
        default_name = 'StochasticGradientLangevinDynamics'
        with tf1.name_scope(name, default_name, [
                learning_rate, preconditioner_decay_rate, data_size, burnin,
                diagonal_bias
        ]):
            if tf.executing_eagerly():
                raise NotImplementedError(
                    'Eager execution currently not supported for '
                    ' SGLD optimizer.')

            self._preconditioner_decay_rate = tf.convert_to_tensor(
                value=preconditioner_decay_rate,
                name='preconditioner_decay_rate')
            self._data_size = tf.convert_to_tensor(value=data_size,
                                                   name='data_size')
            self._burnin = tf.convert_to_tensor(value=burnin,
                                                name='burnin',
                                                dtype=dtype_util.common_dtype(
                                                    [burnin],
                                                    dtype_hint=tf.int64))
            self._diagonal_bias = tf.convert_to_tensor(value=diagonal_bias,
                                                       name='diagonal_bias')
            # TODO(b/124800185): Consider migrating `learning_rate` to be a
            # hyperparameter handled by the base Optimizer class. This would allow
            # users to plug in a `tf.keras.optimizers.schedules.LearningRateSchedule`
            # object in addition to Tensors.
            self._learning_rate = tf.convert_to_tensor(value=learning_rate,
                                                       name='learning_rate')
            self._parallel_iterations = parallel_iterations

            self._preconditioner_decay_rate = distribution_util.with_dependencies(
                [
                    tf1.assert_non_negative(
                        self._preconditioner_decay_rate,
                        message=
                        '`preconditioner_decay_rate` must be non-negative'),
                    tf1.assert_less_equal(
                        self._preconditioner_decay_rate,
                        1.,
                        message='`preconditioner_decay_rate` must be at most 1.'
                    ),
                ], self._preconditioner_decay_rate)

            self._data_size = distribution_util.with_dependencies([
                tf1.assert_greater(
                    self._data_size,
                    0,
                    message='`data_size` must be greater than zero')
            ], self._data_size)

            self._burnin = distribution_util.with_dependencies([
                tf1.assert_non_negative(
                    self._burnin, message='`burnin` must be non-negative'),
                tf1.assert_integer(self._burnin,
                                   message='`burnin` must be an integer')
            ], self._burnin)

            self._diagonal_bias = distribution_util.with_dependencies([
                tf1.assert_non_negative(
                    self._diagonal_bias,
                    message='`diagonal_bias` must be non-negative')
            ], self._diagonal_bias)

            super(StochasticGradientLangevinDynamics,
                  self).__init__(name=name or default_name)
Пример #19
0
def percentile(x,
               q,
               axis=None,
               interpolation=None,
               keep_dims=False,
               validate_args=False,
               preserve_gradients=True,
               name=None):
    """Compute the `q`-th percentile(s) of `x`.

  Given a vector `x`, the `q`-th percentile of `x` is the value `q / 100` of the
  way from the minimum to the maximum in a sorted copy of `x`.

  The values and distances of the two nearest neighbors as well as the
  `interpolation` parameter will determine the percentile if the normalized
  ranking does not match the location of `q` exactly.

  This function is the same as the median if `q = 50`, the same as the minimum
  if `q = 0` and the same as the maximum if `q = 100`.

  Multiple percentiles can be computed at once by using `1-D` vector `q`.
  Dimension zero of the returned `Tensor` will index the different percentiles.

  Compare to `numpy.percentile`.

  Args:
    x:  Numeric `N-D` `Tensor` with `N > 0`.  If `axis` is not `None`,
      `x` must have statically known number of dimensions.
    q:  Scalar or vector `Tensor` with values in `[0, 100]`. The percentile(s).
    axis:  Optional `0-D` or `1-D` integer `Tensor` with constant values. The
      axis that index independent samples over which to return the desired
      percentile.  If `None` (the default), treat every dimension as a sample
      dimension, returning a scalar.
    interpolation : {'nearest', 'linear', 'lower', 'higher', 'midpoint'}.
      Default value: 'nearest'.  This specifies the interpolation method to
      use when the desired quantile lies between two data points `i < j`:
        * linear: i + (j - i) * fraction, where fraction is the fractional part
          of the index surrounded by i and j.
        * lower: `i`.
        * higher: `j`.
        * nearest: `i` or `j`, whichever is nearest.
        * midpoint: (i + j) / 2.
      `linear` and `midpoint` interpolation do not work with integer dtypes.
    keep_dims:  Python `bool`. If `True`, the last dimension is kept with size 1
      If `False`, the last dimension is removed from the output shape.
    validate_args:  Whether to add runtime checks of argument validity. If
      False, and arguments are incorrect, correct behavior is not guaranteed.
    preserve_gradients:  Python `bool`.  If `True`, ensure that gradient w.r.t
      the percentile `q` is preserved in the case of linear interpolation.
      If `False`, the gradient will be (incorrectly) zero when `q` corresponds
      to a point in `x`.
    name:  A Python string name to give this `Op`.  Default is 'percentile'

  Returns:
    A `(rank(q) + N - len(axis))` dimensional `Tensor` of same dtype as `x`, or,
      if `axis` is `None`, a `rank(q)` `Tensor`.  The first `rank(q)` dimensions
      index quantiles for different values of `q`.

  Raises:
    ValueError:  If argument 'interpolation' is not an allowed type.
    ValueError:  If interpolation type not compatible with `dtype`.

  #### Examples

  ```python
  # Get 30th percentile with default ('nearest') interpolation.
  x = [1., 2., 3., 4.]
  tfp.stats.percentile(x, q=30.)
  ==> 2.0

  # Get 30th percentile with 'linear' interpolation.
  x = [1., 2., 3., 4.]
  tfp.stats.percentile(x, q=30., interpolation='linear')
  ==> 1.9

  # Get 30th and 70th percentiles with 'lower' interpolation
  x = [1., 2., 3., 4.]
  tfp.stats.percentile(x, q=[30., 70.], interpolation='lower')
  ==> [1., 3.]

  # Get 100th percentile (maximum).  By default, this is computed over every dim
  x = [[1., 2.]
       [3., 4.]]
  tfp.stats.percentile(x, q=100.)
  ==> 4.

  # Treat the leading dim as indexing samples, and find the 100th quantile (max)
  # over all such samples.
  x = [[1., 2.]
       [3., 4.]]
  tfp.stats.percentile(x, q=100., axis=[0])
  ==> [3., 4.]
  ```

  """
    name = name or 'percentile'
    allowed_interpolations = {
        'linear', 'lower', 'higher', 'nearest', 'midpoint'
    }

    if interpolation is None:
        interpolation = 'nearest'
    else:
        if interpolation not in allowed_interpolations:
            raise ValueError(
                'Argument `interpolation` must be in %s.  Found %s' %
                (allowed_interpolations, interpolation))

    with tf1.name_scope(name, values=[x, q]):
        x = tf.convert_to_tensor(value=x, name='x')

        if interpolation in {'linear', 'midpoint'} and x.dtype.is_integer:
            raise TypeError(
                '{} interpolation not allowed with dtype {}'.format(
                    interpolation, x.dtype))

        # Double is needed here and below, else we get the wrong index if the array
        # is huge along axis.
        q = tf.cast(q, tf.float64)
        _get_static_ndims(q, expect_ndims_no_more_than=1)

        if validate_args:
            q = distribution_util.with_dependencies([
                tf1.assert_rank_in(q, [0, 1]),
                tf1.assert_greater_equal(q, tf.cast(0., tf.float64)),
                tf1.assert_less_equal(q, tf.cast(100., tf.float64))
            ], q)

        # Move `axis` dims of `x` to the rightmost, call it `y`.
        if axis is None:
            y = tf.reshape(x, [-1])
        else:
            x_ndims = _get_static_ndims(x,
                                        expect_static=True,
                                        expect_ndims_at_least=1)
            axis = _make_static_axis_non_negative_list(axis, x_ndims)
            y = _move_dims_to_flat_end(x, axis, x_ndims, right_end=True)

        frac_at_q_or_above = 1. - q / 100.

        # Sort everything, not just the top 'k' entries, which allows multiple calls
        # to sort only once (under the hood) and use CSE.
        sorted_y = _sort_tensor(y)

        d = tf.cast(tf.shape(input=y)[-1], tf.float64)

        def _get_indices(interp_type):
            """Get values of y at the indices implied by interp_type."""
            # Note `lower` <--> ceiling.  Confusing, huh?  Due to the fact that
            # _sort_tensor sorts highest to lowest, tf.ceil corresponds to the higher
            # index, but the lower value of y!
            if interp_type == 'lower':
                indices = tf.math.ceil((d - 1) * frac_at_q_or_above)
            elif interp_type == 'higher':
                indices = tf.floor((d - 1) * frac_at_q_or_above)
            elif interp_type == 'nearest':
                indices = tf.round((d - 1) * frac_at_q_or_above)
            # d - 1 will be distinct from d in int32, but not necessarily double.
            # So clip to avoid out of bounds errors.
            return tf.clip_by_value(tf.cast(indices, tf.int32), 0,
                                    tf.shape(input=y)[-1] - 1)

        if interpolation in ['nearest', 'lower', 'higher']:
            gathered_y = tf.gather(sorted_y,
                                   _get_indices(interpolation),
                                   axis=-1)
        elif interpolation == 'midpoint':
            gathered_y = 0.5 * (
                tf.gather(sorted_y, _get_indices('lower'), axis=-1) +
                tf.gather(sorted_y, _get_indices('higher'), axis=-1))
        elif interpolation == 'linear':
            # Copy-paste of docstring on interpolation:
            # linear: i + (j - i) * fraction, where fraction is the fractional part
            # of the index surrounded by i and j.
            larger_y_idx = _get_indices('lower')
            exact_idx = (d - 1) * frac_at_q_or_above
            if preserve_gradients:
                # If q corresponds to a point in x, we will initially have
                # larger_y_idx == smaller_y_idx.
                # This results in the gradient w.r.t. fraction being zero (recall `q`
                # enters only through `fraction`...and see that things cancel).
                # The fix is to ensure that smaller_y_idx and larger_y_idx are always
                # separated by exactly 1.
                smaller_y_idx = tf.maximum(larger_y_idx - 1, 0)
                larger_y_idx = tf.minimum(smaller_y_idx + 1,
                                          tf.shape(input=y)[-1] - 1)
                fraction = tf.cast(larger_y_idx, tf.float64) - exact_idx
            else:
                smaller_y_idx = _get_indices('higher')
                fraction = tf.math.ceil(
                    (d - 1) * frac_at_q_or_above) - exact_idx

            fraction = tf.cast(fraction, y.dtype)
            gathered_y = (
                tf.gather(sorted_y, larger_y_idx, axis=-1) * (1 - fraction) +
                tf.gather(sorted_y, smaller_y_idx, axis=-1) * fraction)

        # Propagate NaNs
        if x.dtype in (tf.bfloat16, tf.float16, tf.float32, tf.float64):
            # Apparently tf.is_nan doesn't like other dtypes
            nan_batch_members = tf.reduce_any(input_tensor=tf.math.is_nan(x),
                                              axis=axis)
            right_rank_matched_shape = tf.pad(
                tensor=tf.shape(input=nan_batch_members),
                paddings=[[0, tf.rank(input=q)]],
                constant_values=1)
            nan_batch_members = tf.reshape(nan_batch_members,
                                           shape=right_rank_matched_shape)
            nan = np.array(np.nan, gathered_y.dtype.as_numpy_dtype)
            gathered_y = tf.where(nan_batch_members, nan, gathered_y)

        # Expand dimensions if requested
        if keep_dims:
            if axis is None:
                ones_vec = tf.ones(shape=[
                    _get_best_effort_ndims(x) + _get_best_effort_ndims(q)
                ],
                                   dtype=tf.int32)
                gathered_y *= tf.ones(ones_vec, dtype=x.dtype)
            else:
                gathered_y = _insert_back_keep_dims(gathered_y, axis)

        # If q is a scalar, then result has the right shape.
        # If q is a vector, then result has trailing dim of shape q.shape, which
        # needs to be rotated to dim 0.
        return distribution_util.rotate_transpose(gathered_y, tf.rank(q))