Exemplo n.º 1
0
  def _streaming_internal_state(self, inputs):
    outputs = super(Conv1DTranspose, self).call(inputs)

    if self.overlap == 0:
      if self.crop_output:
        return tf.identity(outputs[:, 0:self.output_time_dim, :])
      else:
        return tf.identity(outputs)

    output_shape = outputs.shape.as_list()

    # need to add remainder state to a specific region of output as below:
    # outputs[:,0:self.overlap,:] = outputs[:,0:self.overlap,:] + self.states
    # but 'Tensor' object does not support item assignment,
    # so doing it through full summation below
    output_shape[1] -= self.state_shape[1]
    padded_remainder = tf.concat(
        [self.states, tf.zeros(output_shape, tf.float32)], 1)
    outputs = outputs + padded_remainder

    new_state = outputs[:, -self.overlap:, :]
    assign_states = self.states.assign(new_state)

    with tf.control_dependencies([assign_states]):
      if self.crop_output:
        return tf.identity(outputs[:, 0:self.output_time_dim, :])
      else:
        return tf.identity(outputs)
Exemplo n.º 2
0
  def _streaming_internal_state(self, inputs):
    outputs = super(Conv1DTranspose, self).call(inputs)

    if self.overlap == 0:
      if self.crop_output:
        return tf.identity(outputs[:, 0:self.output_time_dim, :])
      else:
        return tf.identity(outputs)

    output_shape = outputs.shape.as_list()

    # need to add remainder state to a specific region of output as below:
    # outputs[:,0:self.overlap,:] = outputs[:,0:self.overlap,:] + self.states
    # but 'Tensor' object does not support item assignment,
    # so doing it through full summation below
    output_shape[1] -= self.state_shape[1]
    padded_remainder = tf.concat(
        [self.states, tf.zeros(output_shape, tf.float32)], 1)
    outputs = outputs + padded_remainder

    # extract remainder state and substruct bias if it is used:
    # bias will be added in the next iteration again and remainder
    # should have only convolution part, so that bias is not added twice
    if self.use_bias:
      new_state = outputs[:, -self.overlap:, :] - self.bias
    else:
      new_state = outputs[:, -self.overlap:, :]
    assign_states = self.states.assign(new_state)

    with tf.control_dependencies([assign_states]):
      if self.crop_output:
        return tf.identity(outputs[:, 0:self.output_time_dim, :])
      else:
        return tf.identity(outputs)
Exemplo n.º 3
0
    def _streaming_external_state(self, inputs, state):
        state = [] if state is None else state

        # compute inversed FT of any number of input frames
        inversed_frame = tf.signal.inverse_stft(inputs,
                                                self.frame_size,
                                                self.frame_step,
                                                self.fft_size,
                                                window_fn=self.window_fn)
        inversed_frame = tf.cast(inversed_frame, tf.float32)

        # if there is no overlap between frames then
        # there is no need in streaming state processing
        if self.frame_size - self.frame_step <= 0:
            return inversed_frame, state

        if self.use_one_step:  # streaming with input frame by frame
            # update frame state
            new_frame_state = state + inversed_frame[:, 0:self.frame_size]

            # get output hop before frame shifting
            inversed_frames = new_frame_state[:, 0:self.frame_step]

            # shift frame samples by frame_step to the left: ring buffer
            new_frame_state = tf.concat(
                [new_frame_state,
                 tf.zeros([1, self.frame_step])], axis=1)
            new_frame_state = new_frame_state[:, -self.frame_size:]
        else:  # streaming with several input frames
            previous_state = state + inversed_frame[:, 0:self.frame_size]

            new_frame_state = tf.concat(
                [previous_state, inversed_frame[:, self.frame_size:]], axis=1)

            # get output hops before frame shifting
            inversed_frames = new_frame_state[:, 0:self.frame_step *
                                              self.input_frames]

            # shift frame samples by frame_step to the left: ring buffer
            new_frame_state = tf.concat(
                [new_frame_state,
                 tf.zeros([1, self.frame_step])], axis=1)
            new_frame_state = new_frame_state[:, -self.frame_size:]

        return inversed_frames, new_frame_state
Exemplo n.º 4
0
    def _streaming_external_state(self, inputs, state):
        state = [] if state is None else state
        if isinstance(self.get_core_layer(), tf.keras.layers.Conv2DTranspose):
            outputs = self.cell(inputs)

            if self.ring_buffer_size_in_time_dim == 0:
                if self.transposed_conv_crop_output:
                    outputs = outputs[:, 0:self.output_time_dim, :]
                return outputs, []

            output_shape = outputs.shape.as_list()

            output_shape[1] -= self.state_shape[1]
            padded_remainder = tf.concat(
                [state, tf.zeros(output_shape, tf.float32)], 1)
            outputs = outputs + padded_remainder

            if self.get_core_layer().get_config()['use_bias']:
                # need to access bias of the cell layer,
                # where cell can be wrapped by wrapper layer
                bias = self.get_core_layer().bias

                new_state = outputs[:, -self.
                                    ring_buffer_size_in_time_dim:, :] - bias  # pylint: disable=invalid-unary-operand-type
            else:
                new_state = outputs[:, -self.ring_buffer_size_in_time_dim:, :]  # pylint: disable=invalid-unary-operand-type

            if self.transposed_conv_crop_output:
                outputs = outputs[:, 0:self.output_time_dim, :]
            return outputs, new_state
        else:
            if self.use_one_step:
                # The time dimenstion always has to equal 1 in streaming mode.
                if inputs.shape[1] != 1:
                    raise ValueError('inputs.shape[1]: %d must be 1 ' %
                                     inputs.shape[1])

                # remove latest row [batch_size, (memory_size-1), feature_dim, channel]
                memory = state[:, 1:self.ring_buffer_size_in_time_dim, :]

                # add new row [batch_size, memory_size, feature_dim, channel]
                memory = tf.keras.backend.concatenate([memory, inputs], 1)

                output = self.cell(memory)
                return output, memory
            else:
                # add new row [batch_size, memory_size, feature_dim, channel]
                memory = tf.keras.backend.concatenate([state, inputs], 1)

                state_update = memory[:,
                                      -self.ring_buffer_size_in_time_dim:, :]  # pylint: disable=invalid-unary-operand-type

                output = self.cell(memory)
                return output, state_update
Exemplo n.º 5
0
  def _streaming_external_state(self, inputs, states):
    outputs = super(Conv1DTranspose, self).call(inputs)

    if self.overlap == 0:
      if self.crop_output:
        return outputs[:, 0:self.output_time_dim, :], []
      else:
        return outputs, []

    output_shape = outputs.shape.as_list()

    output_shape[1] -= self.state_shape[1]
    padded_remainder = tf.concat(
        [states, tf.zeros(output_shape, tf.float32)], 1)
    outputs = outputs + padded_remainder

    new_state = outputs[:, -self.overlap:, :]
    if self.crop_output:
      return outputs[:, 0:self.output_time_dim, :], new_state
    else:
      return outputs, new_state
def spectrogram_masking(spectrogram, dim=1, masks_number=2, mask_max_size=5):
  """Spectrogram masking on frequency or time dimension.

  Args:
    spectrogram: Input spectrum [batch, time, frequency]
    dim: dimension on which masking will be applied: 1 - time; 2 - frequency
    masks_number: number of masks
    mask_max_size: mask max size
  Returns:
    masked spectrogram
  """
  if dim not in (1, 2):
    raise ValueError('Wrong dim value: %d' % dim)
  input_shape = spectrogram.shape
  time_size, frequency_size = input_shape[1:3]
  dim_size = input_shape[dim]  # size of dimension on which mask is applied
  stripe_shape = [1, time_size, frequency_size]
  for _ in range(masks_number):
    mask_end = tf.random.uniform([], 0, mask_max_size, tf.int32)
    mask_start = tf.random.uniform([], 0, dim_size - mask_end, tf.int32)

    # initialize stripes with stripe_shape
    stripe_ones_left = list(stripe_shape)
    stripe_zeros_center = list(stripe_shape)
    stripe_ones_right = list(stripe_shape)

    # update stripes dim
    stripe_ones_left[dim] = dim_size - mask_start - mask_end
    stripe_zeros_center[dim] = mask_end
    stripe_ones_right[dim] = mask_start

    # generate mask
    mask = tf.concat((
        tf.ones(stripe_ones_left, spectrogram.dtype),
        tf.zeros(stripe_zeros_center, spectrogram.dtype),
        tf.ones(stripe_ones_right, spectrogram.dtype),
    ), dim)
    spectrogram = spectrogram * mask
  return spectrogram
Exemplo n.º 7
0
    def call(self, inputs, training=None):
        # last dim is frame with features
        frame_axis = inputs.shape.rank - 1

        # Makes general slice tuples. This would be equivalent to the [...]
        # slicing sugar, if we knew which axis we wanted.
        def make_framed_slice(start, stop):
            s = [slice(None)] * inputs.shape.rank
            s[frame_axis] = slice(start, stop)
            return tuple(s)

        # Slice containing the first frame element.
        slice_0 = make_framed_slice(0, 1)
        # Slice containing the rightmost frame_size-1 elements.
        slice_right = make_framed_slice(1, None)
        # Slice containing the leftmost frame_size-1 elements.
        slice_left = make_framed_slice(0, -1)

        preemphasized = tf.concat(
            (inputs[slice_0] * (1 - self.preemph),
             inputs[slice_right] - self.preemph * inputs[slice_left]),
            axis=frame_axis)
        return preemphasized
Exemplo n.º 8
0
  def _streaming_internal_state(self, inputs):
    if isinstance(self.get_core_layer(), tf.keras.layers.Conv2DTranspose):
      outputs = self.cell(inputs)

      if self.ring_buffer_size_in_time_dim == 0:
        if self.transposed_conv_crop_output:
          outputs = outputs[:, 0:self.output_time_dim]
        return outputs

      output_shape = outputs.shape.as_list()

      # need to add remainder state to a specific region of output as below:
      # outputs[:,0:self.ring_buffer_size_in_time_dim,:] =
      # outputs[:,0:self.ring_buffer_size_in_time_dim,:] + self.states
      # but 'Tensor' object does not support item assignment,
      # so doing it through full summation below
      output_shape[1] -= self.state_shape[1]
      padded_remainder = tf.concat(
          [self.states, tf.zeros(output_shape, tf.float32)], 1)
      outputs = outputs + padded_remainder

      # extract remainder state and subtract bias if it is used:
      # bias will be added in the next iteration again and remainder
      # should have only convolution part, so that bias is not added twice
      if self.get_core_layer().get_config()['use_bias']:
        # need to access bias of the cell layer,
        # where cell can be wrapped by wrapper layer
        bias = self.get_core_layer().bias
        new_state = outputs[:, -self.ring_buffer_size_in_time_dim:, :] - bias  # pylint: disable=invalid-unary-operand-type
      else:
        new_state = outputs[:, -self.ring_buffer_size_in_time_dim:, :]  # pylint: disable=invalid-unary-operand-type
      assign_states = self.states.assign(new_state)

      with tf.control_dependencies([assign_states]):
        if self.transposed_conv_crop_output:
          return tf.identity(outputs[:, 0:self.output_time_dim, :])
        else:
          return tf.identity(outputs)
    else:
      if self.use_one_step:
        # The time dimenstion always has to equal 1 in streaming mode.
        if inputs.shape[1] != 1:
          raise ValueError('inputs.shape[1]: %d must be 1 ' % inputs.shape[1])

        # remove latest row [batch_size, (memory_size-1), feature_dim, channel]
        memory = self.states[:, 1:self.ring_buffer_size_in_time_dim, :]

        # add new row [batch_size, memory_size, feature_dim, channel]
        memory = tf.keras.backend.concatenate([memory, inputs], 1)

        assign_states = self.states.assign(memory)

        with tf.control_dependencies([assign_states]):
          return self.cell(memory)
      else:
        # add new row [batch_size, memory_size, feature_dim, channel]
        if self.ring_buffer_size_in_time_dim:
          memory = tf.keras.backend.concatenate([self.states, inputs], 1)

          state_update = memory[:, -self.ring_buffer_size_in_time_dim:, :]  # pylint: disable=invalid-unary-operand-type

          assign_states = self.states.assign(state_update)

          with tf.control_dependencies([assign_states]):
            return self.cell(memory)
        else:
          return self.cell(inputs)