def _build_stft_feature(self): """ Compute STFT of waveform and slice the STFT in segment with the right length to feed the network. """ stft_name = self.stft_name spec_name = self.spectrogram_name if stft_name not in self._features: # pad input with a frame of zeros waveform = tf.concat([ tf.zeros((self._frame_length, self._n_channels)), self._features['waveform'] ], 0) stft_feature = tf.transpose( stft(tf.transpose(waveform), self._frame_length, self._frame_step, window_fn=lambda frame_length, dtype: (hann_window(frame_length, periodic=True, dtype=dtype)), pad_end=True), perm=[1, 2, 0]) self._features[f'{self._mix_name}_stft'] = stft_feature if spec_name not in self._features: self._features[spec_name] = tf.abs( pad_and_partition(self._features[stft_name], self._T))[:, :, :self._F, :]
def compute_spectrogram_tf(waveform, frame_length=2048, frame_step=512, spec_exponent=1., window_exponent=1.): """ Compute magnitude / power spectrogram from waveform as a n_samples x n_channels tensor. :param waveform: Input waveform as (times x number of channels) tensor. :param frame_length: Length of a STFT frame to use. :param frame_step: HOP between successive frames. :param spec_exponent: Exponent of the spectrogram (usually 1 for magnitude spectrogram, or 2 for power spectrogram). :param window_exponent: Exponent applied to the Hann windowing function (may be useful for making perfect STFT/iSTFT reconstruction). :returns: Computed magnitude / power spectrogram as a (T x F x n_channels) tensor. """ stft_tensor = tf.transpose(stft( tf.transpose(waveform), frame_length, frame_step, window_fn=lambda f, dtype: hann_window( f, periodic=True, dtype=waveform.dtype)**window_exponent), perm=[1, 2, 0]) return tf.abs(stft_tensor)**spec_exponent
def _inverse_stft(self, stft): inversed = inverse_stft( tf.transpose(stft, perm=[2, 0, 1]), self._frame_length, self._frame_step, window_fn=lambda frame_length, dtype: ( hann_window(frame_length, periodic=True, dtype=dtype)) ) * self.WINDOW_COMPENSATION_FACTOR reshaped = tf.transpose(inversed) return reshaped[:tf.shape(self._features['waveform'])[0], :]
def _build_stft_feature(self): stft_feature = tf.transpose( stft( tf.transpose(self._features['waveform']), self._frame_length, self._frame_step, window_fn=lambda frame_length, dtype: ( hann_window(frame_length, periodic=True, dtype=dtype)), pad_end=True), perm=[1, 2, 0]) self._features[f'{self._mix_name}_stft'] = stft_feature self._features[f'{self._mix_name}_spectrogram'] = tf.abs( pad_and_partition(stft_feature, self._T))[:, :, :self._F, :]
def _build_stft_feature(self): """ Compute STFT of waveform and slice the STFT in segment with the right length to feed the network. """ stft_feature = tf.transpose( stft(tf.transpose(self._features['waveform']), self._frame_length, self._frame_step, window_fn=lambda frame_length, dtype: (hann_window(frame_length, periodic=True, dtype=dtype)), pad_end=True), perm=[1, 2, 0]) self._features[f'{self._mix_name}_stft'] = stft_feature self._features[f'{self._mix_name}_spectrogram'] = tf.abs( pad_and_partition(stft_feature, self._T))[:, :, :self._F, :]
def _inverse_stft(self, stft_t, time_crop=None): """ Inverse and reshape the given STFT :param stft_t: input STFT :returns: inverse STFT (waveform) """ inversed = inverse_stft( tf.transpose(stft_t, perm=[2, 0, 1]), self._frame_length, self._frame_step, window_fn=lambda frame_length, dtype: (hann_window(frame_length, periodic=True, dtype=dtype) )) * self.WINDOW_COMPENSATION_FACTOR reshaped = tf.transpose(inversed) if time_crop is None: time_crop = tf.shape(self._features['waveform'])[0] return reshaped[:time_crop, :]
def _inverse_stft(self, stft_t, time_crop=None): """[Inverse and reshape the given STFT] Arguments: stft_t {[type]} -- [input STFT] Keyword Arguments: time_crop {[type]} -- [description] (default: {None}) Returns: [type] -- [inverse STFT (waveform)] """ inversed = inverse_stft( tf.transpose(stft_t, perm=[2, 0, 1]), self._frame_length, self._frame_step, window_fn=lambda frame_length, dtype: (hann_window(frame_length, periodic=True, dtype=dtype) )) * self.WINDOW_COMPENSATION_FACTOR reshaped = tf.transpose(inversed) if time_crop is None: time_crop = tf.shape(self._features['waveform'])[0] return reshaped[:time_crop, :]
waveform, _ = audio_loader.load(filename, sample_rate=sample_rate) print(waveform.dtype) print("max amplitude: {}".format(np.max(np.abs(waveform)))) # compute spectrogram print("compute stft") frame_length = separator._params['frame_length'] frame_step = separator._params['frame_step'] with predictor.graph.as_default(): stft_feature = tf.transpose( stft(tf.transpose(waveform), frame_length, frame_step, window_fn=lambda frame_length, dtype: (hann_window(frame_length, periodic=True, dtype=dtype)), pad_end=True), perm=[1, 2, 0]) T = separator._params['T'] F = separator._params['F'] spectrogram = tf.abs(pad_and_partition(stft_feature, T))[:, :, :F, :] stft_np = predictor.session.run(stft_feature) spectrogram_np = predictor.session.run(spectrogram) print("yes stft") # compute perturbation with predictor.graph.as_default(): print("build graph") instrnames = []
return total_parameters print('Setting up the tensorflow graph.') train_graph = tf.Graph() with train_graph.as_default(): global_step = tf.Variable(0, trainable=False, name='global_step') # We use c input windows to give the RNN acces to context. x = tf.placeholder(tf.float32, shape=[batch_size, c, window_size]) # The ground labelling is used during traning, wich random sampling # from the network output. y_gt = tf.placeholder(tf.float32, shape=[batch_size, c, m]) # compute the fft in the time domain data. # xf = tf.spectral.fft(tf.complex(x, tf.zeros_like(x))) w = tfsignal.hann_window(window_size, periodic=True) xf = tf.spectral.rfft(x * w) dec_learning_rate = tf.train.exponential_decay(learning_rate, global_step, decay_iterations, learning_rate_decay, staircase=True) optimizer = tf.train.RMSPropOptimizer(dec_learning_rate) tf.summary.scalar('learning_rate', dec_learning_rate) if RNN: def define_bidirecitonal(RNN_in, cell_size, dense_size, stiefel,