def test_oscillator_bank_is_accurate(self, batch_size, fundamental_frequency, n_harmonics, sample_rate, seconds): """Test waveforms generated from oscillator_bank. Generates harmonic waveforms with tensorflow and numpy and tests that they are the same. Test over a range of inputs provided by the parameterized inputs. Args: batch_size: Size of the batch to synthesize. fundamental_frequency: Base frequency of the oscillator in Hertz. n_harmonics: Number of harmonics to synthesize. sample_rate: Sample rate of synthesis in samples per a second. seconds: Length of the generated test sample in seconds. """ n_samples = int(sample_rate * seconds) seconds = float(n_samples) / sample_rate frequencies = fundamental_frequency * np.arange(1, n_harmonics + 1) amplitudes = 1.0 / n_harmonics * np.ones_like(frequencies) # Create tensors of frequencies and amplitudes for tf function. ones = np.ones([batch_size, n_samples, n_harmonics]) frequency_envelopes = ones * frequencies[np.newaxis, np.newaxis, :] amplitude_envelopes = ones * amplitudes[np.newaxis, np.newaxis, :] # Create np test signal. wav_np = create_wave_np(batch_size, frequency_envelopes, amplitude_envelopes, seconds, n_samples) wav_tf = core.oscillator_bank(frequency_envelopes, amplitude_envelopes, sample_rate=sample_rate) pad = 10 # Ignore edge effects. self.assertAllClose(wav_np[pad:-pad], wav_tf[pad:-pad])
def call(self, conditioning): batch_size = conditioning['f0_hz'].shape[0] noise = tf.random.normal([batch_size, self.n_total, 1]) f0_hz = core.resample(conditioning['f0_hz'], self.n_total) frequency_envelopes = core.get_harmonic_frequencies(f0_hz, self.n_harmonics) audios = core.oscillator_bank(frequency_envelopes=frequency_envelopes, amplitude_envelopes=tf.ones_like(frequency_envelopes), sample_rate=self.sample_rate, sum_sinusoids=False) inputs = [conditioning[k] for k in self.input_keys] inputs = [stack(x) for stack, x in zip(self.input_stacks, inputs)] # Resample all inputs to the target sample rate inputs = [core.resample(x, self.n_total) for x in inputs] c = tf.concat(inputs + [audios, noise], axis=-1) # Conv layers x = self.first_conv(c) skips = 0 for f in self.conv_layers: x, h = f(x, c) skips += h skips *= tf.sqrt(1.0 / len(self.conv_layers)) return {'audio_tensor': self.dense_out(skips)}
def _default_processing(self, features): '''Always resample to time_steps and scale f0 signal.''' # Make sure inputs have the right dimensions, i.e. [batch_size, n_frames, {context dependent}] for k in [ "f0", "phase", "phase_unwrapped", "osc", "osc_sub", "phase_sub", "phase_unwrapped_sub", "osc_sub_sync", "phase_unwrapped_sub_sync", "phase_sub_sync" ]: if features.get(k, None) is not None: features[k] = at_least_3d(features[k]) features[k] = resample(features[k], n_timesteps=self.time_steps) # Divide by denom (e.g. number of cylinders in engine to produce subharmonics) features["f0_sub"] = features["f0"] / self.denom # Set additive input features["f0_additive"] = features[self.f0_additive] # Generate osc and phase from f0 if missing for suffix in ["", "_sub"]: if features.get("osc" + suffix, None) is None: amplitudes = tf.ones(tf.shape(features["f0" + suffix])) features["osc" + suffix] = oscillator_bank( features["f0" + suffix], amplitudes, sample_rate=self.rate)[:, :, tf.newaxis] if features.get("phase" + suffix, None) is None: omegas = 2.0 * np.pi * features["f0" + suffix] / float( self.rate) phases = tf.cumsum(omegas, axis=1) features["phase_unwrapped" + suffix] = phases phases_wrapped = tf.math.mod(phases + np.pi, 2 * np.pi) - np.pi features["phase" + suffix] = phases_wrapped for prefix in ["osc_sub", "phase_sub", "phase_unwrapped_sub"]: if features.get(prefix + "_sync", None) is None: features[prefix + "_sync"] = features[prefix] # Prepare decoder network inputs features["f0_scaled"] = hz_to_midi(features["f0"]) / F0_RANGE features["f0_scaled_mel"] = hz_to_mel(features["f0"]) / F0_RANGE_MEL features["f0_sub_scaled"] = hz_to_mel( features["f0_sub"]) / F0_SUB_RANGE for k in ["phase", "phase_sub", "phase_sub_sync"]: if features.get(k, None) is not None: features[k + "_scaled"] = 0.5 + 0.5 * features[k] / np.pi for k in ["osc", "osc_sub", "osc_sub_sync"]: if features.get(k, None) is not None: features[k + "_scaled"] = 0.5 + 0.5 * features[k] return features
def _default_processing(self, features): '''Always resample to time_steps and scale f0 signal.''' features["f0"] = at_least_3d(features["f0"]) features["f0"] = resample(features["f0"], n_timesteps=self.time_steps) # Divide by denom (e.g. number of cylinders in engine to produce subharmonics) features["f0"] /= self.denom # Set additive input features["f0_additive"] = features["f0"] # Prepare decoder network inputs if self.feature_domain == "freq": features["f0_scaled"] = hz_to_midi(features["f0"]) / F0_RANGE elif self.feature_domain == "freq-old": '''DEPRICATED. This option is for backward compability with a version containing a typo.''' features["f0_scaled"] = hz_to_midi( self.denom * features["f0"]) / F0_RANGE / self.denom elif self.feature_domain == "time": amplitudes = tf.ones(tf.shape(features["f0"])) features["f0_scaled"] = oscillator_bank( features["f0"], amplitudes, sample_rate=self.rate)[:, :, tf.newaxis] elif self.feature_domain == "osc": if features.get("osc", None) is None: amplitudes = tf.ones(tf.shape(features["f0"])) features["f0_scaled"] = oscillator_bank( self.denom * features["f0"], amplitudes, sample_rate=self.rate)[:, :, tf.newaxis] else: features["f0_scaled"] = features["osc"][:, :, tf.newaxis] else: raise ValueError("%s is not a valid value for feature_domain." % self.feature_domain) return features
def test_silent_above_nyquist(self, sample_rate): """Tests that no freqencies above nyquist (sample_rate/2) are created.""" nyquist = sample_rate / 2 frequencies = np.array([1.1, 1.5, 2.0]) * nyquist amplitudes = np.ones_like(frequencies) # Create tensors of frequencies and amplitudes for tf function. ones = np.ones([self.batch_size, self.n_samples, 3]) frequency_envelopes = ones * frequencies[np.newaxis, np.newaxis, :] amplitude_envelopes = ones * amplitudes[np.newaxis, np.newaxis, :] wav_tf = core.oscillator_bank( frequency_envelopes, amplitude_envelopes, sample_rate=sample_rate) wav_np = np.zeros_like(wav_tf) self.assertAllClose(wav_np, wav_tf)
def test_oscillator_bank_shape_is_correct(self, sum_sinusoids): """Tests that sum_sinusoids reduces the last dimension.""" frequencies = np.array([1.0, 1.5, 2.0]) * 400.0 amplitudes = np.ones_like(frequencies) # Create tensors of frequencies and amplitudes for tf function. ones = np.ones([self.batch_size, self.n_samples, 3]) frequency_envelopes = ones * frequencies[np.newaxis, np.newaxis, :] amplitude_envelopes = ones * amplitudes[np.newaxis, np.newaxis, :] wav_tf = core.oscillator_bank(frequency_envelopes, amplitude_envelopes, sample_rate=self.sample_rate, sum_sinusoids=sum_sinusoids) if sum_sinusoids: expected_shape = [self.batch_size, self.n_samples] else: expected_shape = [self.batch_size, self.n_samples, 3] self.assertAllEqual(expected_shape, list(wav_tf.shape))
def get_signal(self, amplitudes, frequencies): """Synthesize audio with sinusoidal synthesizer from controls. Args: amplitudes: Amplitude tensor of shape [batch, n_frames, n_sinusoids]. Expects float32 that is strictly positive. frequencies: Tensor of shape [batch, n_frames, n_sinusoids]. Expects float32 in Hertz that is strictly positive. Returns: signal: A tensor of harmonic waves of shape [batch, n_samples]. """ # Create sample-wise envelopes. amplitude_envelopes = core.resample(amplitudes, self.n_samples, method=self.amp_resample_method) frequency_envelopes = core.resample(frequencies, self.n_samples) signal = core.oscillator_bank(frequency_envelopes=frequency_envelopes, amplitude_envelopes=amplitude_envelopes, sample_rate=self.sample_rate) return signal
def get_signal(self, gains, frequencies, dampings): """Synthesize audio with sinusoidal synthesizer from controls. Args: gains: Gains tensor of shape [batch, n_frames, n_sinusoids]. Expects float32 that is strictly positive. frequencies: Tensor of shape [batch, n_frames, n_sinusoids]. Expects float32 in Hertz that is strictly positive. dampings: Tensor of shape [batch, n_frames, n_sinusoids]. Expects float32 in Hertz that is strictly positive. Returns: signal: A tensor of exponentially decaying modal frequencies of shape [batch, n_samples]. """ # Create sample-wise envelopes. t = tf.expand_dims(tf.cast(tf.range(self.n_samples)/self.sample_rate, dtype=tf.float32), axis=1) amplitude_envelopes = gains * tf.exp(-dampings * t) frequency_envelopes = frequencies * tf.ones_like(amplitude_envelopes) ir_half = core.oscillator_bank(frequency_envelopes=frequency_envelopes, amplitude_envelopes=amplitude_envelopes, sample_rate=self.sample_rate) signal = tf.concat((tf.zeros_like(ir_half), ir_half), axis=1) return signal
def additive_synthesis(self, amplitudes, frequency_shifts=None, frequency_distribution=None, n_samples=64000, sample_rate=16000, amp_resample_method="window"): '''Generate audio from frame-wise monophonic harmonic oscillator bank. Args: amplitudes: Frame-wise oscillator peak amplitude. Shape [batch_size, n_frames, 1]. frequency_shifts: Harmonic frequency variations (Hz), zero-centered. Total frequency of a harmonic is equal to (frequencies * (1 + frequency_shifts)). Shape [batch_size, n_frames, n_harmonics]. frequency_distribution: Harmonic amplitude variations, ranged zero to one. Total amplitude of a harmonic is equal to (amplitudes * frequency_distribution). Shape [batch_size, n_frames, n_harmonics]. n_samples: Total length of output audio. Interpolates and crops to this. sample_rate: Sample rate. amp_resample_method: Mode with which to resample amplitude envelopes. Returns: audio: Output audio. Shape [batch_size, n_samples, 1] ''' amplitudes = core.tf_float32(amplitudes) batch_size = amplitudes.shape[0] n_frames = amplitudes.shape[1] if frequency_distribution is not None: frequency_distribution = core.tf_float32(frequency_distribution) n_frequencies = int(frequency_distribution.shape[-1]) elif harmonic_shifts is not None: harmonic_shifts = core.tf_float32(harmonic_shifts) n_frequencies = int(frequency_shifts.shape[-1]) else: n_frequencies = 1 # Create frequencies [batch_size, n_frames, n_frequencies]. frequencies = self.get_linear_frequencies(batch_size, n_frames, n_frequencies) if frequency_shifts is not None: frequencies *= (1.0 + harmonic_shifts) # Create harmonic amplitudes [batch_size, n_frames, n_frequencies]. if frequency_distribution is not None: frequency_amplitudes = amplitudes * frequency_distribution else: frequency_amplitudes = amplitudes # Create sample-wise envelopes. frequency_envelopes = core.resample(frequencies, n_samples) # cycles/sec amplitude_envelopes = core.resample(frequency_amplitudes, n_samples, method=amp_resample_method) # Synthesize from harmonics [batch_size, n_samples]. audio = core.oscillator_bank(frequency_envelopes, amplitude_envelopes, sample_rate=sample_rate) return audio