示例#1
0
 def lazy_tensor():
     tf.debugging.assert_rank(data, 3)
     tf.debugging.assert_non_negative(max_outputs)
     limited_audio = data[:max_outputs]
     encode_fn = functools.partial(audio_ops.encode_wav,
                                   sample_rate=sample_rate)
     encoded_audio = tf.map_fn(encode_fn,
                               limited_audio,
                               dtype=tf.string,
                               name='encode_each_audio')
     # Workaround for map_fn returning float dtype for an empty elems input.
     encoded_audio = tf.cond(
         tf.shape(input=encoded_audio)[0] > 0, lambda: encoded_audio,
         lambda: tf.constant([], tf.string))
     limited_labels = tf.tile([''], tf.shape(input=limited_audio)[:1])
     return tf.transpose(a=tf.stack([encoded_audio, limited_labels]))
示例#2
0
        def lazy_tensor():
            tf.debugging.assert_rank(data, 3)
            tf.debugging.assert_non_negative(max_outputs)
            limited_audio = data[:max_outputs]

            encode_fn = functools.partial(
                audio_ops.encode_wav, sample_rate=sample_rate
            )
            if lengths is not None:
                tf.debugging.assert_rank(lengths, 1)
                limited_lengths = lengths[:max_outputs]

                def encode_with_length(datum_and_length):
                    datum, length = datum_and_length
                    return encode_fn(datum[:length])

                encoded_audio = tf.map_fn(
                    encode_with_length,
                    (limited_audio, limited_lengths),
                    dtype=tf.string,
                    name="encode_each_audio",
                )
            else:
                encoded_audio = tf.map_fn(
                    encode_fn,
                    limited_audio,
                    dtype=tf.string,
                    name="encode_each_audio",
                )
            # Workaround for map_fn returning float dtype for an empty elems input.
            encoded_audio = tf.cond(
                tf.shape(input=encoded_audio)[0] > 0,
                lambda: encoded_audio,
                lambda: tf.constant([], tf.string),
            )
            limited_labels = tf.tile([""], tf.shape(input=limited_audio)[:1])
            return tf.transpose(a=tf.stack([encoded_audio, limited_labels]))
示例#3
0
def audio(name,
          data,
          sample_rate,
          step,
          max_outputs=3,
          encoding=None,
          description=None):
    """Write an audio summary.

  Arguments:
    name: A name for this summary. The summary tag used for TensorBoard will
      be this name prefixed by any active name scopes.
    data: A `Tensor` representing audio data with shape `[k, t, c]`,
      where `k` is the number of audio clips, `t` is the number of
      frames, and `c` is the number of channels. Elements should be
      floating-point values in `[-1.0, 1.0]`. Any of the dimensions may
      be statically unknown (i.e., `None`).
    sample_rate: An `int` or rank-0 `int32` `Tensor` that represents the
      sample rate, in Hz. Must be positive.
    step: Required `int64`-castable monotonic step value.
    max_outputs: Optional `int` or rank-0 integer `Tensor`. At most this
      many audio clips will be emitted at each step. When more than
      `max_outputs` many clips are provided, the first `max_outputs`
      many clips will be used and the rest silently discarded.
    encoding: Optional constant `str` for the desired encoding. Only "wav"
      is currently supported, but this is not guaranteed to remain the
      default, so if you want "wav" in particular, set this explicitly.
    description: Optional long-form description for this summary, as a
      constant `str`. Markdown is supported. Defaults to empty.

  Returns:
    True on success, or false if no summary was emitted because no default
    summary writer was available.
  """
    # TODO(nickfelt): get encode_wav() exported in the public API.
    from tensorflow.python.ops import gen_audio_ops

    if encoding is None:
        encoding = 'wav'
    if encoding != 'wav':
        raise ValueError('Unknown encoding: %r' % encoding)
    summary_metadata = metadata.create_summary_metadata(
        display_name=None,
        description=description,
        encoding=metadata.Encoding.Value('WAV'))
    inputs = [data, sample_rate, max_outputs, step]
    with tf.summary.summary_scope(name, 'audio_summary',
                                  values=inputs) as (tag, _):
        tf.debugging.assert_rank(data, 3)
        tf.debugging.assert_non_negative(max_outputs)
        limited_audio = data[:max_outputs]
        encode_fn = functools.partial(gen_audio_ops.encode_wav,
                                      sample_rate=sample_rate)
        encoded_audio = tf.map_fn(encode_fn,
                                  limited_audio,
                                  dtype=tf.string,
                                  name='encode_each_audio')
        # Workaround for map_fn returning float dtype for an empty elems input.
        encoded_audio = tf.cond(
            tf.shape(input=encoded_audio)[0] > 0, lambda: encoded_audio,
            lambda: tf.constant([], tf.string))
        limited_labels = tf.tile([''], tf.shape(input=limited_audio)[:1])
        tensor = tf.transpose(a=tf.stack([encoded_audio, limited_labels]))
        return tf.summary.write(tag=tag,
                                tensor=tensor,
                                step=step,
                                metadata=summary_metadata)