def spectrogram_summary(name, audio, audio_gen, step): """Writes a summary of spectrograms for a batch of images.""" specgram = lambda a: spectral_ops.compute_logmag(tf_float32(a), size=768) # Batch spectrogram operations spectrograms = specgram(audio) spectrograms_gen = specgram(audio_gen) batch_size = int(audio.shape[0]) for sample_idx in range(batch_size): # Manually specify exact size of fig for tensorboard fig, axs = plt.subplots(2, 1, figsize=(8, 8)) ax = axs[0] spec = np.rot90(spectrograms[sample_idx]) ax.matshow(spec, vmin=-5, vmax=1, aspect='auto', cmap=plt.cm.magma) ax.set_title('original') ax.set_xticks([]) ax.set_yticks([]) ax = axs[1] spec = np.rot90(spectrograms_gen[sample_idx]) ax.matshow(spec, vmin=-5, vmax=1, aspect='auto', cmap=plt.cm.magma) ax.set_title('synthesized') ax.set_xticks([]) ax.set_yticks([]) # Format and save plot to image tag = 'spectrogram/{}_{}'.format(name, sample_idx) fig_summary(tag, fig, step)
def specplot(audio, vmin=-5, vmax=1, rotate=True, size=512 + 256, **matshow_kwargs): """Plot the log magnitude spectrogram of audio.""" # If batched, take first element. if len(audio.shape) == 2: audio = audio[0] logmag = spectral_ops.compute_logmag(core.tf_float32(audio), size=size) if rotate: logmag = np.rot90(logmag) # Plotting. plt.matshow(logmag, vmin=vmin, vmax=vmax, cmap=plt.cm.magma, aspect='auto', **matshow_kwargs) plt.xticks([]) plt.yticks([]) plt.xlabel('Time') plt.ylabel('Frequency')
def call(self, audio, *conditioning): if self.spectral_op == 'compute_mfcc': z = spectral_ops.compute_mfcc( audio, lo_hz=20.0, hi_hz=8000.0, fft_size=self.fft_size, mel_bins=128, mfcc_bins=30, overlap=self.overlap, pad_end=True) elif self.spectral_op == 'compute_logmag': z = spectral_ops.compute_logmag(core.tf_float32(audio), size=self.fft_size) # Normalize. z = self.z_norm(z[:, :, tf.newaxis, :])[:, :, 0, :] n_timesteps = z.shape[1] conditioning = [resample(c, n_timesteps) for c in conditioning] z = tf.concat([z] + conditioning, axis=-1) # Run an RNN over the latents. z = self.rnn(z) # Bounce down to compressed z dimensions. w = tf.math.sigmoid(self.confidence(z)) z = self.dense_out(z) z = tf.reduce_sum(z * w, axis=1, keepdims=True) / tf.reduce_sum(w, axis=1, keepdims=True) return z
def spectrogram(audio, sess=None, rotate=False, size=2048): """Compute logmag spectrogram.""" if sess is None: sess = tf.Session() mag = sess.run( spectral_ops.compute_logmag( tf.convert_to_tensor(audio, tf.float32), size=size)) if rotate: mag = np.rot90(mag) return mag
def get_spectrogram(audio, rotate=False, size=1024): """Compute logmag spectrogram.""" mag = spectral_ops.compute_logmag(tf_float32(audio), size=size) if rotate: mag = np.rot90(mag) return mag