Exemplo n.º 1
0
    def test_oscillator_bank_is_accurate(self, batch_size,
                                         fundamental_frequency, n_harmonics,
                                         sample_rate, seconds):
        """Test waveforms generated from oscillator_bank.

    Generates harmonic waveforms with tensorflow and numpy and tests that they
    are the same. Test over a range of inputs provided by the parameterized
    inputs.

    Args:
      batch_size: Size of the batch to synthesize.
      fundamental_frequency: Base frequency of the oscillator in Hertz.
      n_harmonics: Number of harmonics to synthesize.
      sample_rate: Sample rate of synthesis in samples per a second.
      seconds: Length of the generated test sample in seconds.
    """
        n_samples = int(sample_rate * seconds)
        seconds = float(n_samples) / sample_rate
        frequencies = fundamental_frequency * np.arange(1, n_harmonics + 1)
        amplitudes = 1.0 / n_harmonics * np.ones_like(frequencies)

        # Create tensors of frequencies and amplitudes for tf function.
        ones = np.ones([batch_size, n_samples, n_harmonics])
        frequency_envelopes = ones * frequencies[np.newaxis, np.newaxis, :]
        amplitude_envelopes = ones * amplitudes[np.newaxis, np.newaxis, :]

        # Create np test signal.
        wav_np = create_wave_np(batch_size, frequency_envelopes,
                                amplitude_envelopes, seconds, n_samples)

        wav_tf = core.oscillator_bank(frequency_envelopes,
                                      amplitude_envelopes,
                                      sample_rate=sample_rate)
        pad = 10  # Ignore edge effects.
        self.assertAllClose(wav_np[pad:-pad], wav_tf[pad:-pad])
Exemplo n.º 2
0
  def call(self, conditioning):
    batch_size = conditioning['f0_hz'].shape[0]
    noise = tf.random.normal([batch_size, self.n_total, 1])

    f0_hz = core.resample(conditioning['f0_hz'], self.n_total)
    frequency_envelopes = core.get_harmonic_frequencies(f0_hz, self.n_harmonics)
    audios = core.oscillator_bank(frequency_envelopes=frequency_envelopes,
                    amplitude_envelopes=tf.ones_like(frequency_envelopes),
                    sample_rate=self.sample_rate,
                    sum_sinusoids=False)
    
    inputs = [conditioning[k] for k in self.input_keys]
    inputs = [stack(x) for stack, x in zip(self.input_stacks, inputs)]

    # Resample all inputs to the target sample rate
    inputs = [core.resample(x, self.n_total) for x in inputs]
    
    c = tf.concat(inputs + [audios, noise], axis=-1)
    # Conv layers
    x = self.first_conv(c)
    skips = 0
    for f in self.conv_layers:
      x, h = f(x, c)
      skips += h
    skips *= tf.sqrt(1.0 / len(self.conv_layers))

    return {'audio_tensor': self.dense_out(skips)}
Exemplo n.º 3
0
    def _default_processing(self, features):
        '''Always resample to time_steps and scale f0 signal.'''
        # Make sure inputs have the right dimensions, i.e. [batch_size, n_frames, {context dependent}]
        for k in [
                "f0", "phase", "phase_unwrapped", "osc", "osc_sub",
                "phase_sub", "phase_unwrapped_sub", "osc_sub_sync",
                "phase_unwrapped_sub_sync", "phase_sub_sync"
        ]:
            if features.get(k, None) is not None:
                features[k] = at_least_3d(features[k])
                features[k] = resample(features[k],
                                       n_timesteps=self.time_steps)

        # Divide by denom (e.g. number of cylinders in engine to produce subharmonics)
        features["f0_sub"] = features["f0"] / self.denom

        # Set additive input
        features["f0_additive"] = features[self.f0_additive]

        # Generate osc and phase from f0 if missing
        for suffix in ["", "_sub"]:
            if features.get("osc" + suffix, None) is None:
                amplitudes = tf.ones(tf.shape(features["f0" + suffix]))
                features["osc" + suffix] = oscillator_bank(
                    features["f0" + suffix], amplitudes,
                    sample_rate=self.rate)[:, :, tf.newaxis]
            if features.get("phase" + suffix, None) is None:
                omegas = 2.0 * np.pi * features["f0" + suffix] / float(
                    self.rate)
                phases = tf.cumsum(omegas, axis=1)
                features["phase_unwrapped" + suffix] = phases
                phases_wrapped = tf.math.mod(phases + np.pi, 2 * np.pi) - np.pi
                features["phase" + suffix] = phases_wrapped

        for prefix in ["osc_sub", "phase_sub", "phase_unwrapped_sub"]:
            if features.get(prefix + "_sync", None) is None:
                features[prefix + "_sync"] = features[prefix]

        # Prepare decoder network inputs
        features["f0_scaled"] = hz_to_midi(features["f0"]) / F0_RANGE
        features["f0_scaled_mel"] = hz_to_mel(features["f0"]) / F0_RANGE_MEL
        features["f0_sub_scaled"] = hz_to_mel(
            features["f0_sub"]) / F0_SUB_RANGE
        for k in ["phase", "phase_sub", "phase_sub_sync"]:
            if features.get(k, None) is not None:
                features[k + "_scaled"] = 0.5 + 0.5 * features[k] / np.pi
        for k in ["osc", "osc_sub", "osc_sub_sync"]:
            if features.get(k, None) is not None:
                features[k + "_scaled"] = 0.5 + 0.5 * features[k]

        return features
Exemplo n.º 4
0
    def _default_processing(self, features):
        '''Always resample to time_steps and scale f0 signal.'''
        features["f0"] = at_least_3d(features["f0"])
        features["f0"] = resample(features["f0"], n_timesteps=self.time_steps)

        # Divide by denom (e.g. number of cylinders in engine to produce subharmonics)
        features["f0"] /= self.denom

        # Set additive input
        features["f0_additive"] = features["f0"]

        # Prepare decoder network inputs
        if self.feature_domain == "freq":
            features["f0_scaled"] = hz_to_midi(features["f0"]) / F0_RANGE
        elif self.feature_domain == "freq-old":
            '''DEPRICATED. This option is for backward compability with a version containing a typo.'''
            features["f0_scaled"] = hz_to_midi(
                self.denom * features["f0"]) / F0_RANGE / self.denom
        elif self.feature_domain == "time":
            amplitudes = tf.ones(tf.shape(features["f0"]))
            features["f0_scaled"] = oscillator_bank(
                features["f0"], amplitudes, sample_rate=self.rate)[:, :,
                                                                   tf.newaxis]
        elif self.feature_domain == "osc":
            if features.get("osc", None) is None:
                amplitudes = tf.ones(tf.shape(features["f0"]))
                features["f0_scaled"] = oscillator_bank(
                    self.denom * features["f0"],
                    amplitudes,
                    sample_rate=self.rate)[:, :, tf.newaxis]
            else:
                features["f0_scaled"] = features["osc"][:, :, tf.newaxis]
        else:
            raise ValueError("%s is not a valid value for feature_domain." %
                             self.feature_domain)

        return features
Exemplo n.º 5
0
  def test_silent_above_nyquist(self, sample_rate):
    """Tests that no freqencies above nyquist (sample_rate/2) are created."""
    nyquist = sample_rate / 2
    frequencies = np.array([1.1, 1.5, 2.0]) * nyquist
    amplitudes = np.ones_like(frequencies)

    # Create tensors of frequencies and amplitudes for tf function.
    ones = np.ones([self.batch_size, self.n_samples, 3])
    frequency_envelopes = ones * frequencies[np.newaxis, np.newaxis, :]
    amplitude_envelopes = ones * amplitudes[np.newaxis, np.newaxis, :]

    wav_tf = core.oscillator_bank(
        frequency_envelopes, amplitude_envelopes, sample_rate=sample_rate)
    wav_np = np.zeros_like(wav_tf)
    self.assertAllClose(wav_np, wav_tf)
Exemplo n.º 6
0
    def test_oscillator_bank_shape_is_correct(self, sum_sinusoids):
        """Tests that sum_sinusoids reduces the last dimension."""
        frequencies = np.array([1.0, 1.5, 2.0]) * 400.0
        amplitudes = np.ones_like(frequencies)

        # Create tensors of frequencies and amplitudes for tf function.
        ones = np.ones([self.batch_size, self.n_samples, 3])
        frequency_envelopes = ones * frequencies[np.newaxis, np.newaxis, :]
        amplitude_envelopes = ones * amplitudes[np.newaxis, np.newaxis, :]

        wav_tf = core.oscillator_bank(frequency_envelopes,
                                      amplitude_envelopes,
                                      sample_rate=self.sample_rate,
                                      sum_sinusoids=sum_sinusoids)
        if sum_sinusoids:
            expected_shape = [self.batch_size, self.n_samples]
        else:
            expected_shape = [self.batch_size, self.n_samples, 3]
        self.assertAllEqual(expected_shape, list(wav_tf.shape))
Exemplo n.º 7
0
  def get_signal(self, amplitudes, frequencies):
    """Synthesize audio with sinusoidal synthesizer from controls.

    Args:
      amplitudes: Amplitude tensor of shape [batch, n_frames, n_sinusoids].
        Expects float32 that is strictly positive.
      frequencies: Tensor of shape [batch, n_frames, n_sinusoids].
        Expects float32 in Hertz that is strictly positive.

    Returns:
      signal: A tensor of harmonic waves of shape [batch, n_samples].
    """
    # Create sample-wise envelopes.
    amplitude_envelopes = core.resample(amplitudes, self.n_samples,
                                        method=self.amp_resample_method)
    frequency_envelopes = core.resample(frequencies, self.n_samples)

    signal = core.oscillator_bank(frequency_envelopes=frequency_envelopes,
                                  amplitude_envelopes=amplitude_envelopes,
                                  sample_rate=self.sample_rate)
    return signal
Exemplo n.º 8
0
  def get_signal(self, gains, frequencies, dampings):
    """Synthesize audio with sinusoidal synthesizer from controls.

    Args:
      gains: Gains tensor of shape [batch, n_frames, n_sinusoids].
        Expects float32 that is strictly positive.
      frequencies: Tensor of shape [batch, n_frames, n_sinusoids].
        Expects float32 in Hertz that is strictly positive.
      dampings: Tensor of shape [batch, n_frames, n_sinusoids].
        Expects float32 in Hertz that is strictly positive.

    Returns:
      signal: A tensor of exponentially decaying modal frequencies of shape [batch, n_samples].
    """
    # Create sample-wise envelopes.
    t = tf.expand_dims(tf.cast(tf.range(self.n_samples)/self.sample_rate, dtype=tf.float32), axis=1)
    amplitude_envelopes = gains * tf.exp(-dampings * t)
    frequency_envelopes = frequencies * tf.ones_like(amplitude_envelopes)
    ir_half = core.oscillator_bank(frequency_envelopes=frequency_envelopes,
                                   amplitude_envelopes=amplitude_envelopes,
                                   sample_rate=self.sample_rate)
    signal = tf.concat((tf.zeros_like(ir_half), ir_half), axis=1)
    return signal
Exemplo n.º 9
0
    def additive_synthesis(self,
                           amplitudes,
                           frequency_shifts=None,
                           frequency_distribution=None,
                           n_samples=64000,
                           sample_rate=16000,
                           amp_resample_method="window"):
        '''Generate audio from frame-wise monophonic harmonic oscillator bank.

        Args:
            amplitudes: Frame-wise oscillator peak amplitude. Shape [batch_size,
                n_frames, 1].
            frequency_shifts: Harmonic frequency variations (Hz), zero-centered. Total
                frequency of a harmonic is equal to (frequencies * (1 +
                frequency_shifts)). Shape [batch_size, n_frames, n_harmonics].
            frequency_distribution: Harmonic amplitude variations, ranged zero to one.
                Total amplitude of a harmonic is equal to (amplitudes *
                frequency_distribution). Shape [batch_size, n_frames, n_harmonics].
            n_samples: Total length of output audio. Interpolates and crops to this.
            sample_rate: Sample rate.
            amp_resample_method: Mode with which to resample amplitude envelopes.

        Returns:
            audio: Output audio. Shape [batch_size, n_samples, 1]
        '''
        amplitudes = core.tf_float32(amplitudes)
        batch_size = amplitudes.shape[0]
        n_frames = amplitudes.shape[1]

        if frequency_distribution is not None:
            frequency_distribution = core.tf_float32(frequency_distribution)
            n_frequencies = int(frequency_distribution.shape[-1])
        elif harmonic_shifts is not None:
            harmonic_shifts = core.tf_float32(harmonic_shifts)
            n_frequencies = int(frequency_shifts.shape[-1])
        else:
            n_frequencies = 1

        # Create frequencies [batch_size, n_frames, n_frequencies].
        frequencies = self.get_linear_frequencies(batch_size, n_frames,
                                                  n_frequencies)
        if frequency_shifts is not None:
            frequencies *= (1.0 + harmonic_shifts)

        # Create harmonic amplitudes [batch_size, n_frames, n_frequencies].
        if frequency_distribution is not None:
            frequency_amplitudes = amplitudes * frequency_distribution
        else:
            frequency_amplitudes = amplitudes

        # Create sample-wise envelopes.
        frequency_envelopes = core.resample(frequencies,
                                            n_samples)  # cycles/sec
        amplitude_envelopes = core.resample(frequency_amplitudes,
                                            n_samples,
                                            method=amp_resample_method)

        # Synthesize from harmonics [batch_size, n_samples].
        audio = core.oscillator_bank(frequency_envelopes,
                                     amplitude_envelopes,
                                     sample_rate=sample_rate)
        return audio