def _streaming_internal_state(self, inputs): outputs = super(Conv1DTranspose, self).call(inputs) if self.overlap == 0: if self.crop_output: return tf.identity(outputs[:, 0:self.output_time_dim, :]) else: return tf.identity(outputs) output_shape = outputs.shape.as_list() # need to add remainder state to a specific region of output as below: # outputs[:,0:self.overlap,:] = outputs[:,0:self.overlap,:] + self.states # but 'Tensor' object does not support item assignment, # so doing it through full summation below output_shape[1] -= self.state_shape[1] padded_remainder = tf.concat( [self.states, tf.zeros(output_shape, tf.float32)], 1) outputs = outputs + padded_remainder new_state = outputs[:, -self.overlap:, :] assign_states = self.states.assign(new_state) with tf.control_dependencies([assign_states]): if self.crop_output: return tf.identity(outputs[:, 0:self.output_time_dim, :]) else: return tf.identity(outputs)
def _streaming_internal_state(self, inputs): outputs = super(Conv1DTranspose, self).call(inputs) if self.overlap == 0: if self.crop_output: return tf.identity(outputs[:, 0:self.output_time_dim, :]) else: return tf.identity(outputs) output_shape = outputs.shape.as_list() # need to add remainder state to a specific region of output as below: # outputs[:,0:self.overlap,:] = outputs[:,0:self.overlap,:] + self.states # but 'Tensor' object does not support item assignment, # so doing it through full summation below output_shape[1] -= self.state_shape[1] padded_remainder = tf.concat( [self.states, tf.zeros(output_shape, tf.float32)], 1) outputs = outputs + padded_remainder # extract remainder state and substruct bias if it is used: # bias will be added in the next iteration again and remainder # should have only convolution part, so that bias is not added twice if self.use_bias: new_state = outputs[:, -self.overlap:, :] - self.bias else: new_state = outputs[:, -self.overlap:, :] assign_states = self.states.assign(new_state) with tf.control_dependencies([assign_states]): if self.crop_output: return tf.identity(outputs[:, 0:self.output_time_dim, :]) else: return tf.identity(outputs)
def _streaming_external_state(self, inputs, state): state = [] if state is None else state # compute inversed FT of any number of input frames inversed_frame = tf.signal.inverse_stft(inputs, self.frame_size, self.frame_step, self.fft_size, window_fn=self.window_fn) inversed_frame = tf.cast(inversed_frame, tf.float32) # if there is no overlap between frames then # there is no need in streaming state processing if self.frame_size - self.frame_step <= 0: return inversed_frame, state if self.use_one_step: # streaming with input frame by frame # update frame state new_frame_state = state + inversed_frame[:, 0:self.frame_size] # get output hop before frame shifting inversed_frames = new_frame_state[:, 0:self.frame_step] # shift frame samples by frame_step to the left: ring buffer new_frame_state = tf.concat( [new_frame_state, tf.zeros([1, self.frame_step])], axis=1) new_frame_state = new_frame_state[:, -self.frame_size:] else: # streaming with several input frames previous_state = state + inversed_frame[:, 0:self.frame_size] new_frame_state = tf.concat( [previous_state, inversed_frame[:, self.frame_size:]], axis=1) # get output hops before frame shifting inversed_frames = new_frame_state[:, 0:self.frame_step * self.input_frames] # shift frame samples by frame_step to the left: ring buffer new_frame_state = tf.concat( [new_frame_state, tf.zeros([1, self.frame_step])], axis=1) new_frame_state = new_frame_state[:, -self.frame_size:] return inversed_frames, new_frame_state
def _streaming_external_state(self, inputs, state): state = [] if state is None else state if isinstance(self.get_core_layer(), tf.keras.layers.Conv2DTranspose): outputs = self.cell(inputs) if self.ring_buffer_size_in_time_dim == 0: if self.transposed_conv_crop_output: outputs = outputs[:, 0:self.output_time_dim, :] return outputs, [] output_shape = outputs.shape.as_list() output_shape[1] -= self.state_shape[1] padded_remainder = tf.concat( [state, tf.zeros(output_shape, tf.float32)], 1) outputs = outputs + padded_remainder if self.get_core_layer().get_config()['use_bias']: # need to access bias of the cell layer, # where cell can be wrapped by wrapper layer bias = self.get_core_layer().bias new_state = outputs[:, -self. ring_buffer_size_in_time_dim:, :] - bias # pylint: disable=invalid-unary-operand-type else: new_state = outputs[:, -self.ring_buffer_size_in_time_dim:, :] # pylint: disable=invalid-unary-operand-type if self.transposed_conv_crop_output: outputs = outputs[:, 0:self.output_time_dim, :] return outputs, new_state else: if self.use_one_step: # The time dimenstion always has to equal 1 in streaming mode. if inputs.shape[1] != 1: raise ValueError('inputs.shape[1]: %d must be 1 ' % inputs.shape[1]) # remove latest row [batch_size, (memory_size-1), feature_dim, channel] memory = state[:, 1:self.ring_buffer_size_in_time_dim, :] # add new row [batch_size, memory_size, feature_dim, channel] memory = tf.keras.backend.concatenate([memory, inputs], 1) output = self.cell(memory) return output, memory else: # add new row [batch_size, memory_size, feature_dim, channel] memory = tf.keras.backend.concatenate([state, inputs], 1) state_update = memory[:, -self.ring_buffer_size_in_time_dim:, :] # pylint: disable=invalid-unary-operand-type output = self.cell(memory) return output, state_update
def _streaming_external_state(self, inputs, states): outputs = super(Conv1DTranspose, self).call(inputs) if self.overlap == 0: if self.crop_output: return outputs[:, 0:self.output_time_dim, :], [] else: return outputs, [] output_shape = outputs.shape.as_list() output_shape[1] -= self.state_shape[1] padded_remainder = tf.concat( [states, tf.zeros(output_shape, tf.float32)], 1) outputs = outputs + padded_remainder new_state = outputs[:, -self.overlap:, :] if self.crop_output: return outputs[:, 0:self.output_time_dim, :], new_state else: return outputs, new_state
def spectrogram_masking(spectrogram, dim=1, masks_number=2, mask_max_size=5): """Spectrogram masking on frequency or time dimension. Args: spectrogram: Input spectrum [batch, time, frequency] dim: dimension on which masking will be applied: 1 - time; 2 - frequency masks_number: number of masks mask_max_size: mask max size Returns: masked spectrogram """ if dim not in (1, 2): raise ValueError('Wrong dim value: %d' % dim) input_shape = spectrogram.shape time_size, frequency_size = input_shape[1:3] dim_size = input_shape[dim] # size of dimension on which mask is applied stripe_shape = [1, time_size, frequency_size] for _ in range(masks_number): mask_end = tf.random.uniform([], 0, mask_max_size, tf.int32) mask_start = tf.random.uniform([], 0, dim_size - mask_end, tf.int32) # initialize stripes with stripe_shape stripe_ones_left = list(stripe_shape) stripe_zeros_center = list(stripe_shape) stripe_ones_right = list(stripe_shape) # update stripes dim stripe_ones_left[dim] = dim_size - mask_start - mask_end stripe_zeros_center[dim] = mask_end stripe_ones_right[dim] = mask_start # generate mask mask = tf.concat(( tf.ones(stripe_ones_left, spectrogram.dtype), tf.zeros(stripe_zeros_center, spectrogram.dtype), tf.ones(stripe_ones_right, spectrogram.dtype), ), dim) spectrogram = spectrogram * mask return spectrogram
def _streaming_internal_state(self, inputs): if isinstance(self.get_core_layer(), tf.keras.layers.Conv2DTranspose): outputs = self.cell(inputs) if self.ring_buffer_size_in_time_dim == 0: if self.transposed_conv_crop_output: outputs = outputs[:, 0:self.output_time_dim] return outputs output_shape = outputs.shape.as_list() # need to add remainder state to a specific region of output as below: # outputs[:,0:self.ring_buffer_size_in_time_dim,:] = # outputs[:,0:self.ring_buffer_size_in_time_dim,:] + self.states # but 'Tensor' object does not support item assignment, # so doing it through full summation below output_shape[1] -= self.state_shape[1] padded_remainder = tf.concat( [self.states, tf.zeros(output_shape, tf.float32)], 1) outputs = outputs + padded_remainder # extract remainder state and subtract bias if it is used: # bias will be added in the next iteration again and remainder # should have only convolution part, so that bias is not added twice if self.get_core_layer().get_config()['use_bias']: # need to access bias of the cell layer, # where cell can be wrapped by wrapper layer bias = self.get_core_layer().bias new_state = outputs[:, -self.ring_buffer_size_in_time_dim:, :] - bias # pylint: disable=invalid-unary-operand-type else: new_state = outputs[:, -self.ring_buffer_size_in_time_dim:, :] # pylint: disable=invalid-unary-operand-type assign_states = self.states.assign(new_state) with tf.control_dependencies([assign_states]): if self.transposed_conv_crop_output: return tf.identity(outputs[:, 0:self.output_time_dim, :]) else: return tf.identity(outputs) else: if self.use_one_step: # The time dimenstion always has to equal 1 in streaming mode. if inputs.shape[1] != 1: raise ValueError('inputs.shape[1]: %d must be 1 ' % inputs.shape[1]) # remove latest row [batch_size, (memory_size-1), feature_dim, channel] memory = self.states[:, 1:self.ring_buffer_size_in_time_dim, :] # add new row [batch_size, memory_size, feature_dim, channel] memory = tf.keras.backend.concatenate([memory, inputs], 1) assign_states = self.states.assign(memory) with tf.control_dependencies([assign_states]): return self.cell(memory) else: # add new row [batch_size, memory_size, feature_dim, channel] if self.ring_buffer_size_in_time_dim: memory = tf.keras.backend.concatenate([self.states, inputs], 1) state_update = memory[:, -self.ring_buffer_size_in_time_dim:, :] # pylint: disable=invalid-unary-operand-type assign_states = self.states.assign(state_update) with tf.control_dependencies([assign_states]): return self.cell(memory) else: return self.cell(inputs)
def random_cutout( inputs, mask_size, mask_value=0, seed=None, data_format='channels_last', ): """Applies cutout (https://arxiv.org/abs/1708.04552) to inputs. It is based on addons/tensorflow_addons/image/cutout_ops.py kept here here for backward compatibility Args: inputs: input tensor [batch_size, time, feature, channels] mask_size: mask size (time feature) mask_value: mask will be filled with this value seed: random seed data_format: dimesnions order Returns: masked image Raises: ValueError: if inputs.shape.rank != 4 """ if inputs.shape.rank != 4: raise ValueError('inputs.shape.rank:%d must be 4' % inputs.shape.rank) mask_size = tf.convert_to_tensor(mask_size) if tf.rank(mask_size) == 0: mask_size = tf.stack([mask_size, mask_size]) if data_format == 'channels_last': time_size, feature_size = tf.shape(inputs)[1], tf.shape(inputs)[2] else: time_size, feature_size = tf.shape(inputs)[2], tf.shape(inputs)[3] batch_size = tf.shape(inputs)[0] cutout_center_time = tf.random.uniform( shape=[batch_size], minval=0, maxval=time_size, dtype=tf.int32, seed=seed ) cutout_center_feature = tf.random.uniform( shape=[batch_size], minval=0, maxval=feature_size, dtype=tf.int32, seed=seed) offset = tf.transpose([cutout_center_time, cutout_center_feature], [1, 0]) origin_shape = inputs.shape offset = tf.convert_to_tensor(offset) mask_size = mask_size // 2 cutout_center_time = offset[:, 0] cutout_center_feature = offset[:, 1] lower_pads = tf.maximum(0, cutout_center_time - mask_size[0]) upper_pads = tf.maximum(0, time_size - cutout_center_time - mask_size[0]) left_pads = tf.maximum(0, cutout_center_feature - mask_size[1]) right_pads = tf.maximum(0, feature_size - cutout_center_feature - mask_size[1]) cutout_shape = tf.transpose( [ time_size - (lower_pads + upper_pads), feature_size - (left_pads + right_pads), ], [1, 0], ) masks = tf.TensorArray(inputs.dtype, 0, dynamic_size=True) for i in tf.range(tf.shape(cutout_shape)[0]): padding_dims = [ [lower_pads[i], upper_pads[i]], [left_pads[i], right_pads[i]], ] mask = tf.pad( tf.zeros(cutout_shape[i], dtype=inputs.dtype), padding_dims, constant_values=1, ) masks = masks.write(i, mask) if data_format == 'channels_last': mask = tf.expand_dims(masks.stack(), -1) mask = tf.tile(mask, [1, 1, 1, tf.shape(inputs)[-1]]) else: mask = tf.expand_dims(masks.stack(), 1) mask = tf.tile(mask, [1, tf.shape(inputs)[1], 1, 1]) inputs = tf.where( tf.equal(mask, 0), tf.ones_like(inputs, dtype=inputs.dtype) * mask_value, inputs, ) inputs.set_shape(origin_shape) return inputs