示例#1
0
def frame(signal, frame_length, frame_step, pad_end=False, pad_value=0, axis=-1,
          name=None):
  """Expands `signal`'s `axis` dimension into frames of `frame_length`.

  Slides a window of size `frame_length` over `signal`'s `axis` dimension
  with a stride of `frame_step`, replacing the `axis` dimension with
  `[frames, frame_length]` frames.

  If `pad_end` is True, window positions that are past the end of the `axis`
  dimension are padded with `pad_value` until the window moves fully past the
  end of the dimension. Otherwise, only window positions that fully overlap the
  `axis` dimension are produced.

  For example:

  ```python
  pcm = tf.placeholder(tf.float32, [None, 9152])
  frames = tf.signal.frame(pcm, 512, 180)
  magspec = tf.abs(tf.spectral.rfft(frames, [512]))
  image = tf.expand_dims(magspec, 3)
  ```

  Args:
    signal: A `[..., samples, ...]` `Tensor`. The rank and dimensions
      may be unknown. Rank must be at least 1.
    frame_length: The frame length in samples. An integer or scalar `Tensor`.
    frame_step: The frame hop size in samples. An integer or scalar `Tensor`.
    pad_end: Whether to pad the end of `signal` with `pad_value`.
    pad_value: An optional scalar `Tensor` to use where the input signal
      does not exist when `pad_end` is True.
    axis: A scalar integer `Tensor` indicating the axis to frame. Defaults to
      the last axis. Supports negative values for indexing from the end.
    name: An optional name for the operation.

  Returns:
    A `Tensor` of frames with shape `[..., frames, frame_length, ...]`.

  Raises:
    ValueError: If `frame_length`, `frame_step`, `pad_value`, or `axis` are not
      scalar.
  """
  with ops.name_scope(name, "frame", [signal, frame_length, frame_step,
                                      pad_value]):
    signal = ops.convert_to_tensor(signal, name="signal")
    frame_length = ops.convert_to_tensor(frame_length, name="frame_length")
    frame_step = ops.convert_to_tensor(frame_step, name="frame_step")
    axis = ops.convert_to_tensor(axis, name="axis")

    signal.shape.with_rank_at_least(1)
    frame_length.shape.assert_has_rank(0)
    frame_step.shape.assert_has_rank(0)
    axis.shape.assert_has_rank(0)

    result_shape = _infer_frame_shape(signal, frame_length, frame_step, pad_end,
                                      axis)

    # Axis can be negative. Convert it to positive.
    signal_rank = array_ops.rank(signal)
    axis = math_ops.range(signal_rank)[axis]

    signal_shape = array_ops.shape(signal)
    outer_dimensions, length_samples, inner_dimensions = array_ops.split(
        signal_shape, [axis, 1, signal_rank - 1 - axis])
    length_samples = array_ops.reshape(length_samples, [])
    num_outer_dimensions = array_ops.size(outer_dimensions)
    num_inner_dimensions = array_ops.size(inner_dimensions)

    # If padding is requested, pad the input signal tensor with pad_value.
    if pad_end:
      pad_value = ops.convert_to_tensor(pad_value, signal.dtype)
      pad_value.shape.assert_has_rank(0)

      # Calculate number of frames, using double negatives to round up.
      num_frames = -(-length_samples // frame_step)

      # Pad the signal by up to frame_length samples based on how many samples
      # are remaining starting from last_frame_position.
      pad_samples = math_ops.maximum(
          0, frame_length + frame_step * (num_frames - 1) - length_samples)

      # Pad the inner dimension of signal by pad_samples.
      paddings = array_ops.concat(
          [array_ops.zeros([num_outer_dimensions, 2], dtype=pad_samples.dtype),
           [[0, pad_samples]],
           array_ops.zeros([num_inner_dimensions, 2], dtype=pad_samples.dtype)],
          0)
      signal = array_ops.pad(signal, paddings, constant_values=pad_value)

      signal_shape = array_ops.shape(signal)
      length_samples = signal_shape[axis]
    else:
      num_frames = math_ops.maximum(
          0, 1 + (length_samples - frame_length) // frame_step)

    subframe_length = util_ops.gcd(frame_length, frame_step)
    subframes_per_frame = frame_length // subframe_length
    subframes_per_hop = frame_step // subframe_length
    num_subframes = length_samples // subframe_length

    slice_shape = array_ops.concat([outer_dimensions,
                                    [num_subframes * subframe_length],
                                    inner_dimensions], 0)
    subframe_shape = array_ops.concat([outer_dimensions,
                                       [num_subframes, subframe_length],
                                       inner_dimensions], 0)
    subframes = array_ops.reshape(array_ops.strided_slice(
        signal, array_ops.zeros_like(signal_shape),
        slice_shape), subframe_shape)

    # frame_selector is a [num_frames, subframes_per_frame] tensor
    # that indexes into the appropriate frame in subframes. For example:
    # [[0, 0, 0, 0], [2, 2, 2, 2], [4, 4, 4, 4]]
    frame_selector = array_ops.reshape(
        math_ops.range(num_frames) * subframes_per_hop, [num_frames, 1])

    # subframe_selector is a [num_frames, subframes_per_frame] tensor
    # that indexes into the appropriate subframe within a frame. For example:
    # [[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3]]
    subframe_selector = array_ops.reshape(
        math_ops.range(subframes_per_frame), [1, subframes_per_frame])

    # Adding the 2 selector tensors together produces a [num_frames,
    # subframes_per_frame] tensor of indices to use with tf.gather to select
    # subframes from subframes. We then reshape the inner-most
    # subframes_per_frame dimension to stitch the subframes together into
    # frames. For example: [[0, 1, 2, 3], [2, 3, 4, 5], [4, 5, 6, 7]].
    selector = frame_selector + subframe_selector

    frames = array_ops.reshape(
        array_ops.gather(subframes, selector, axis=axis),
        array_ops.concat([outer_dimensions, [num_frames, frame_length],
                          inner_dimensions], 0))

    if result_shape:
      frames.set_shape(result_shape)
    return frames
示例#2
0
def frame(signal,
          frame_length,
          frame_step,
          pad_end=False,
          pad_value=0,
          axis=-1,
          name=None):
    """Expands `signal`'s `axis` dimension into frames of `frame_length`.

  Slides a window of size `frame_length` over `signal`'s `axis` dimension
  with a stride of `frame_step`, replacing the `axis` dimension with
  `[frames, frame_length]` frames.

  If `pad_end` is True, window positions that are past the end of the `axis`
  dimension are padded with `pad_value` until the window moves fully past the
  end of the dimension. Otherwise, only window positions that fully overlap the
  `axis` dimension are produced.

  For example:

  ```python
  # A batch size 3 tensor of 9152 audio samples.
  audio = tf.random.normal([3, 9152])

  # Compute overlapping frames of length 512 with a step of 180 (frames overlap
  # by 332 samples). By default, only 50 frames are generated since the last
  # 152 samples do not form a full frame.
  frames = tf.signal.frame(audio, 512, 180)
  frames.shape.assert_is_compatible_with([3, 50, 512])

  # When pad_end is enabled, the final frame is kept (padded with zeros).
  frames = tf.signal.frame(audio, 512, 180, pad_end=True)
  frames.shape.assert_is_compatible_with([3, 51, 512])
  ```

  Args:
    signal: A `[..., samples, ...]` `Tensor`. The rank and dimensions
      may be unknown. Rank must be at least 1.
    frame_length: The frame length in samples. An integer or scalar `Tensor`.
    frame_step: The frame hop size in samples. An integer or scalar `Tensor`.
    pad_end: Whether to pad the end of `signal` with `pad_value`.
    pad_value: An optional scalar `Tensor` to use where the input signal
      does not exist when `pad_end` is True.
    axis: A scalar integer `Tensor` indicating the axis to frame. Defaults to
      the last axis. Supports negative values for indexing from the end.
    name: An optional name for the operation.

  Returns:
    A `Tensor` of frames with shape `[..., frames, frame_length, ...]`.

  Raises:
    ValueError: If `frame_length`, `frame_step`, `pad_value`, or `axis` are not
      scalar.
  """
    with ops.name_scope(name, "frame",
                        [signal, frame_length, frame_step, pad_value]):
        signal = ops.convert_to_tensor(signal, name="signal")
        frame_length = ops.convert_to_tensor(frame_length, name="frame_length")
        frame_step = ops.convert_to_tensor(frame_step, name="frame_step")
        axis = ops.convert_to_tensor(axis, name="axis")

        signal.shape.with_rank_at_least(1)
        frame_length.shape.assert_has_rank(0)
        frame_step.shape.assert_has_rank(0)
        axis.shape.assert_has_rank(0)

        result_shape = _infer_frame_shape(signal, frame_length, frame_step,
                                          pad_end, axis)

        # Axis can be negative. Convert it to positive.
        signal_rank = array_ops.rank(signal)
        axis = math_ops.range(signal_rank)[axis]

        signal_shape = array_ops.shape(signal)
        outer_dimensions, length_samples, inner_dimensions = array_ops.split(
            signal_shape, [axis, 1, signal_rank - 1 - axis])
        length_samples = array_ops.reshape(length_samples, [])
        num_outer_dimensions = array_ops.size(outer_dimensions)
        num_inner_dimensions = array_ops.size(inner_dimensions)

        # If padding is requested, pad the input signal tensor with pad_value.
        if pad_end:
            pad_value = ops.convert_to_tensor(pad_value, signal.dtype)
            pad_value.shape.assert_has_rank(0)

            # Calculate number of frames, using double negatives to round up.
            num_frames = -(-length_samples // frame_step)

            # Pad the signal by up to frame_length samples based on how many samples
            # are remaining starting from last_frame_position.
            pad_samples = math_ops.maximum(
                0,
                frame_length + frame_step * (num_frames - 1) - length_samples)

            # Pad the inner dimension of signal by pad_samples.
            paddings = array_ops.concat([
                array_ops.zeros([num_outer_dimensions, 2],
                                dtype=pad_samples.dtype), [[0, pad_samples]],
                array_ops.zeros([num_inner_dimensions, 2],
                                dtype=pad_samples.dtype)
            ], 0)
            signal = array_ops.pad(signal, paddings, constant_values=pad_value)

            signal_shape = array_ops.shape(signal)
            length_samples = signal_shape[axis]
        else:
            num_frames = math_ops.maximum(
                0, 1 + (length_samples - frame_length) // frame_step)

        subframe_length = util_ops.gcd(frame_length, frame_step)
        subframes_per_frame = frame_length // subframe_length
        subframes_per_hop = frame_step // subframe_length
        num_subframes = length_samples // subframe_length

        slice_shape = array_ops.concat([
            outer_dimensions, [num_subframes * subframe_length],
            inner_dimensions
        ], 0)
        subframe_shape = array_ops.concat([
            outer_dimensions, [num_subframes, subframe_length],
            inner_dimensions
        ], 0)
        subframes = array_ops.reshape(
            array_ops.strided_slice(signal, array_ops.zeros_like(signal_shape),
                                    slice_shape), subframe_shape)

        # frame_selector is a [num_frames, subframes_per_frame] tensor
        # that indexes into the appropriate frame in subframes. For example:
        # [[0, 0, 0, 0], [2, 2, 2, 2], [4, 4, 4, 4]]
        frame_selector = array_ops.reshape(
            math_ops.range(num_frames) * subframes_per_hop, [num_frames, 1])

        # subframe_selector is a [num_frames, subframes_per_frame] tensor
        # that indexes into the appropriate subframe within a frame. For example:
        # [[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3]]
        subframe_selector = array_ops.reshape(
            math_ops.range(subframes_per_frame), [1, subframes_per_frame])

        # Adding the 2 selector tensors together produces a [num_frames,
        # subframes_per_frame] tensor of indices to use with tf.gather to select
        # subframes from subframes. We then reshape the inner-most
        # subframes_per_frame dimension to stitch the subframes together into
        # frames. For example: [[0, 1, 2, 3], [2, 3, 4, 5], [4, 5, 6, 7]].
        selector = frame_selector + subframe_selector

        frames = array_ops.reshape(
            array_ops.gather(subframes, selector, axis=axis),
            array_ops.concat([
                outer_dimensions, [num_frames, frame_length], inner_dimensions
            ], 0))

        if result_shape:
            frames.set_shape(result_shape)
        return frames
示例#3
0
def overlap_and_add(signal, frame_step, name=None):
    """Reconstructs a signal from a framed representation.

  Adds potentially overlapping frames of a signal with shape
  `[..., frames, frame_length]`, offsetting subsequent frames by `frame_step`.
  The resulting tensor has shape `[..., output_size]` where

      output_size = (frames - 1) * frame_step + frame_length

  Args:
    signal: A [..., frames, frame_length] `Tensor`. All dimensions may be
      unknown, and rank must be at least 2.
    frame_step: An integer or scalar `Tensor` denoting overlap offsets. Must be
      less than or equal to `frame_length`.
    name: An optional name for the operation.

  Returns:
    A `Tensor` with shape `[..., output_size]` containing the overlap-added
    frames of `signal`'s inner-most two dimensions.

  Raises:
    ValueError: If `signal`'s rank is less than 2, `frame_step` is not a scalar
      integer or `frame_step` is greater than `frame_length`.
  """
    with ops.name_scope(name, "overlap_and_add", [signal, frame_step]):
        signal = ops.convert_to_tensor(signal, name="signal")
        signal.shape.with_rank_at_least(2)
        frame_step = ops.convert_to_tensor(frame_step, name="frame_step")
        frame_step.shape.assert_has_rank(0)
        if not frame_step.dtype.is_integer:
            raise ValueError("frame_step must be an integer. Got %s" %
                             frame_step.dtype)

        signal_shape = array_ops.shape(signal)

        # All dimensions that are not part of the overlap-and-add. Can be empty for
        # rank 2 inputs.
        outer_dimensions = signal_shape[:-2]

        # If frame_length and frame_step are known at graph construction time, check
        # frame_step is less than or equal to frame_length.
        frame_step_static = tensor_util.constant_value(frame_step)
        if (frame_step_static is not None and signal.shape.ndims is not None
                and signal.shape.dims[-1].value is not None):
            if frame_step_static > signal.shape.dims[-1].value:
                raise ValueError(
                    "frame_step (%d) must be less than or equal to "
                    "frame_length (%d)" %
                    (frame_step_static, signal.shape.dims[-1].value))
            # If frame_length is equal to frame_step, there's no overlap so just
            # reshape the tensor.
            if frame_step_static == signal.shape.dims[-1].value:
                return array_ops.reshape(
                    signal, array_ops.concat([outer_dimensions, [-1]], 0))

        signal_rank = array_ops.rank(signal)
        frames = signal_shape[-2]
        frame_length = signal_shape[-1]

        subframe_length = util_ops.gcd(frame_length, frame_step)
        subframe_step = frame_step // subframe_length
        subframes_per_frame = frame_length // subframe_length
        output_size = frame_step * (frames - 1) + frame_length
        output_subframes = output_size // subframe_length

        # To avoid overlap-adding sample-by-sample, we overlap-add at the "subframe"
        # level, where a subframe is gcd(frame_length, frame_step). Reshape signal
        # from [..., frames, frame_length] into [..., subframes, subframe_length].
        subframe_shape = array_ops.concat(
            [outer_dimensions, [-1, subframe_length]], 0)
        subframe_signal = array_ops.reshape(signal, subframe_shape)

        # Now we shuffle the last [subframes, subframe_length] dimensions to the
        # front.
        # TODO(rjryan): Add an axis argument to unsorted_segment_sum so we can
        # avoid this pair of transposes.
        subframe_signal = _shuffle_to_front(subframe_signal, 2)

        # Use unsorted_segment_sum to add overlapping subframes together.
        segment_ids = array_ops.reshape(
            shape_ops.frame(math_ops.range(output_subframes),
                            subframes_per_frame,
                            subframe_step,
                            pad_end=False), [-1])
        result = math_ops.unsorted_segment_sum(subframe_signal,
                                               segment_ids,
                                               num_segments=output_subframes)

        # result is a [subframes, subframe_length, ...outer_dimensions] tensor. We
        # return a [...outer_dimensions, output_size] tensor with a transpose and
        # reshape.
        result_shape = array_ops.concat([outer_dimensions, [output_size]], 0)
        return array_ops.reshape(_shuffle_to_front(result, signal_rank - 2),
                                 result_shape)