Пример #1
0
"""
Use a triplet-loss to learn a similarity metric between short spectrograms

UNSUPERVISED LEARNING OF SEMANTIC AUDIO REPRESENTATIONS
https://arxiv.org/pdf/1711.02209.pdf
"""

import numpy as np
import zounds
from zounds.spectral import apply_scale

samplerate = zounds.SR11025()
BaseModel = zounds.resampled(resample_to=samplerate, store_resampled=True)

scale_bands = 96
spectrogram_duration = 64

anchor_slice = slice(spectrogram_duration, spectrogram_duration * 2)

scale = zounds.GeometricScale(
    start_center_hz=50,
    stop_center_hz=samplerate.nyquist,
    bandwidth_ratio=0.115,
    n_bands=scale_bands)
scale.ensure_overlap_ratio()

spectrogram_duration = 64

windowing_scheme = zounds.HalfLapped()
spectrogram_sample_rate = zounds.SampleRate(
    frequency=windowing_scheme.frequency * (spectrogram_duration // 2),
Пример #2
0
import zounds

Resampled = zounds.resampled(resample_to=zounds.SR11025())


@zounds.simple_in_memory_settings
class Sound(Resampled):
    """
    A simple pipeline that computes a perceptually weighted modified discrete
    cosine transform, and "persists" feature data in an in-memory store.
    """

    windowed = zounds.ArrayWithUnitsFeature(
        zounds.SlidingWindow,
        needs=Resampled.resampled,
        wscheme=zounds.HalfLapped(),
        wfunc=zounds.OggVorbisWindowingFunc(),
        store=True)

    mdct = zounds.ArrayWithUnitsFeature(
        zounds.MDCT,
        needs=windowed)

    weighted = zounds.ArrayWithUnitsFeature(
        lambda x: x * zounds.AWeighting(),
        needs=mdct)

if __name__ == '__main__':

    # produce some audio to test our pipeline, and encode it as FLAC
    synth = zounds.SineSynthesizer(zounds.SR44100())
Пример #3
0
import zounds

Resampled = zounds.resampled(resample_to=zounds.SR11025())


@zounds.simple_in_memory_settings
class Sound(Resampled):
    """
    A simple pipeline that computes a perceptually weighted modified discrete
    cosine transform, and "persists" feature data in an in-memory store.
    """

    windowed = zounds.ArrayWithUnitsFeature(
        zounds.SlidingWindow,
        needs=Resampled.resampled,
        wscheme=zounds.HalfLapped(),
        wfunc=zounds.OggVorbisWindowingFunc(),
        store=True)

    mdct = zounds.ArrayWithUnitsFeature(zounds.MDCT, needs=windowed)

    weighted = zounds.ArrayWithUnitsFeature(lambda x: x * zounds.AWeighting(),
                                            needs=mdct)


if __name__ == '__main__':

    # produce some audio to test our pipeline, and encode it as FLAC
    synth = zounds.SineSynthesizer(zounds.SR44100())
    samples = synth.synthesize(zounds.Seconds(5), [220., 440., 880.])
    encoded = samples.encode(fmt='FLAC')
import argparse
from random import choice

import featureflow as ff
import numpy as np
from torch import nn
from torch.nn import functional as F
from torch.optim import Adam

import zounds
from zounds.learn import Conv1d, ConvTranspose1d, to_var, from_var
from zounds.timeseries import categorical, inverse_categorical

samplerate = zounds.SR11025()
BaseModel = zounds.resampled(resample_to=samplerate, store_resampled=True)

window_size = 8192
wscheme = zounds.SampleRate(frequency=samplerate.frequency *
                            (window_size // 2),
                            duration=samplerate.frequency * window_size)


@zounds.simple_lmdb_settings('ae', map_size=1e10, user_supplied_id=True)
class Sound(BaseModel):
    windowed = zounds.ArrayWithUnitsFeature(zounds.SlidingWindow,
                                            wscheme=wscheme,
                                            needs=BaseModel.resampled)

    mu_law = zounds.ArrayWithUnitsFeature(zounds.mu_law, needs=windowed)

    categorical = zounds.ArrayWithUnitsFeature(categorical, needs=windowed)
Пример #5
0
import numpy as np
import zounds
from zounds.spectral import apply_scale

samplerate = zounds.SR11025()
BaseModel = zounds.resampled(resample_to=samplerate)

scale_bands = 96
spectrogram_duration = 64

anchor_slice = slice(spectrogram_duration, spectrogram_duration * 2)

scale = zounds.GeometricScale(
    start_center_hz=50,
    stop_center_hz=samplerate.nyquist,
    bandwidth_ratio=0.115,
    n_bands=scale_bands)
scale.ensure_overlap_ratio()

spectrogram_duration = 64

windowing_scheme = zounds.HalfLapped()
spectrogram_sample_rate = zounds.SampleRate(
    frequency=windowing_scheme.frequency * (spectrogram_duration // 2),
    duration=windowing_scheme.frequency * spectrogram_duration)


def spectrogram(x):
    x = apply_scale(
        np.abs(x.real), scale, window=zounds.OggVorbisWindowingFunc())
    x = zounds.log_modulus(x * 100)