Пример #1
0
def resampled(chunksize_bytes=DEFAULT_CHUNK_SIZE,
              resample_to=SR44100(),
              store_resampled=False):
    """
    Create a basic processing pipeline that can resample all incoming audio
    to a normalized sampling rate for downstream processing, and store a
    convenient, compressed version for playback

    :param chunksize_bytes: The number of bytes from the raw stream to process
    at once
    :param resample_to: The new, normalized sampling rate
    :return: A simple processing pipeline
    """
    class Resampled(BaseModel):
        meta = JSONFeature(MetaData, store=True, encoder=AudioMetaDataEncoder)

        raw = ByteStreamFeature(ByteStream,
                                chunksize=chunksize_bytes,
                                needs=meta,
                                store=False)

        ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True)

        pcm = AudioSamplesFeature(AudioStream, needs=raw, store=False)

        resampled = AudioSamplesFeature(Resampler,
                                        needs=pcm,
                                        samplerate=resample_to,
                                        store=store_resampled)

    return Resampled
Пример #2
0
    def setUp(self):
        self.samplerate = SR44100()
        rs = resampled(resample_to=self.samplerate)

        wscheme = HalfLapped()

        @simple_in_memory_settings
        class Document(rs):
            windowed = ArrayWithUnitsFeature(
                SlidingWindow,
                wscheme=wscheme,
                wfunc=OggVorbisWindowingFunc(),
                needs=rs.resampled,
                store=False)

            fft = ArrayWithUnitsFeature(
                FFT,
                needs=windowed,
                store=False)

            centroid = ArrayWithUnitsFeature(
                SpectralCentroid,
                needs=fft,
                store=True)

        ss = SineSynthesizer(self.samplerate)
        chunks = \
            [ss.synthesize(Seconds(1), [440 * i]) for i in range(1, 6)]
        self.audio = \
            AudioSamples(ArrayWithUnits.concat(chunks), self.samplerate)
        _id = Document.process(meta=self.audio.encode())
        self.doc = Document(_id)
Пример #3
0
def frequency_adaptive(long_window_sample_rate,
                       scale,
                       store_freq_adaptive=False,
                       check_scale_overlap_ratio=False,
                       chunksize_bytes=DEFAULT_CHUNK_SIZE,
                       resample_to=SR44100(),
                       store_resampled=False):
    BaseModel = resampled(chunksize_bytes, resample_to, store_resampled)

    class FrequencyAdaptive(BaseModel):
        long_windowed = ArrayWithUnitsFeature(SlidingWindow,
                                              wscheme=long_window_sample_rate,
                                              wfunc=OggVorbisWindowingFunc(),
                                              needs=BaseModel.resampled,
                                              store=False)

        long_fft = ArrayWithUnitsFeature(FFT, needs=long_windowed, store=False)

        freq_adaptive = FrequencyAdaptiveFeature(
            FrequencyAdaptiveTransform,
            transform=np.fft.irfft,
            scale=scale,
            check_scale_overlap_ratio=check_scale_overlap_ratio,
            window_func=np.hanning,
            needs=long_fft,
            store=store_freq_adaptive)

    return FrequencyAdaptive
Пример #4
0
 def test_can_repr(self):
     cs = ChunkSizeBytes(SR44100(), Seconds(30), channels=2, bit_depth=16)
     s = cs.__repr__()
     self.assertEqual(
         'ChunkSizeBytes(samplerate=SR44100(f=2.2675736e-05, '
         'd=2.2675736e-05), duration=30 seconds, channels=2, bit_depth=16)',
         s)
Пример #5
0
 def test_matches_fftfreq(self):
     samplerate = SR44100()
     n_bands = 2048
     fft_freqs = np.fft.rfftfreq(n_bands, 1 / int(samplerate))
     bands = LinearScale.from_sample_rate(samplerate, n_bands // 2)
     linear_freqs = np.array([b.start_hz for b in bands])
     np.testing.assert_allclose(linear_freqs, fft_freqs[:-1])
Пример #6
0
def audio_graph(chunksize_bytes=DEFAULT_CHUNK_SIZE,
                resample_to=SR44100(),
                store_fft=False):
    """
    Produce a base class suitable as a starting point for many audio processing
    pipelines.  This class resamples all audio to a common sampling rate, and
    produces a bark band spectrogram from overlapping short-time fourier
    transform frames.  It also compresses the audio into ogg vorbis format for
    compact storage.
    """

    band = FrequencyBand(20, resample_to.nyquist)

    class AudioGraph(BaseModel):
        meta = JSONFeature(MetaData, store=True, encoder=AudioMetaDataEncoder)

        raw = ByteStreamFeature(ByteStream,
                                chunksize=chunksize_bytes,
                                needs=meta,
                                store=False)

        ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True)

        pcm = AudioSamplesFeature(AudioStream, needs=raw, store=False)

        resampled = AudioSamplesFeature(Resampler,
                                        needs=pcm,
                                        samplerate=resample_to,
                                        store=False)

        windowed = ArrayWithUnitsFeature(SlidingWindow,
                                         needs=resampled,
                                         wscheme=HalfLapped(),
                                         wfunc=OggVorbisWindowingFunc(),
                                         store=False)

        dct = ArrayWithUnitsFeature(DCT, needs=windowed, store=True)

        fft = ArrayWithUnitsFeature(FFT, needs=windowed, store=store_fft)

        bark = ArrayWithUnitsFeature(BarkBands,
                                     needs=fft,
                                     frequency_band=band,
                                     store=True)

        centroid = ArrayWithUnitsFeature(SpectralCentroid,
                                         needs=bark,
                                         store=True)

        chroma = ArrayWithUnitsFeature(Chroma,
                                       needs=fft,
                                       frequency_band=band,
                                       store=True)

        bfcc = ArrayWithUnitsFeature(BFCC, needs=fft, store=True)

    return AudioGraph
Пример #7
0
 def test_has_correct_sample_rate(self):
     half_lapped = HalfLapped()
     synth = DCTSynthesizer()
     raw = np.zeros((100, 2048))
     band = FrequencyBand(0, SR44100().nyquist)
     scale = LinearScale(band, raw.shape[1])
     timeseries = ArrayWithUnits(
         raw, [TimeDimension(*half_lapped), FrequencyDimension(scale)])
     output = synth.synthesize(timeseries)
     self.assertIsInstance(output.samplerate, SR44100)
     self.assertIsInstance(output, AudioSamples)
Пример #8
0
    def test_can_do_multithreaded_resampling(self):
        synth = SilenceSynthesizer(SR44100())
        audio = [synth.synthesize(Seconds(5)) for _ in xrange(10)]
        pool = ThreadPool(4)

        def x(samples):
            rs = Resample(int(SR44100()), int(SR11025()))
            return rs(samples, end_of_input=True)

        resampled = pool.map(x, audio)
        self.assertEqual(10, len(resampled))
Пример #9
0
 def test_can_get_all_even_sized_bands(self):
     samplerate = SR44100()
     scale = LinearScale.from_sample_rate(samplerate,
                                          44100,
                                          always_even=True)
     log_scale = GeometricScale(20, 20000, 0.01, 64)
     slices = [scale.get_slice(band) for band in log_scale]
     sizes = [s.stop - s.start for s in slices]
     self.assertTrue(
         not any([s % 2 for s in sizes]),
         'All slice sizes should be even but were {sizes}'.format(
             **locals()))
Пример #10
0
    def setUp(self):
        @simple_in_memory_settings
        class Document(stft(store_fft=True)):
            pass

        synth = NoiseSynthesizer(SR44100())
        audio = synth.synthesize(Seconds(2))

        _id = Document.process(meta=audio.encode())
        doc = Document(_id)

        non_doc = SomethingElse(11)

        parser = FeatureParser(Document, locals())

        self.document = Document
        self.doc = doc
        self.parser = parser
Пример #11
0
def windowed(wscheme,
             chunksize_bytes=DEFAULT_CHUNK_SIZE,
             resample_to=SR44100(),
             store_resampled=True,
             store_windowed=False,
             wfunc=None):

    rs = resampled(chunksize_bytes=chunksize_bytes,
                   resample_to=resample_to,
                   store_resampled=store_resampled)

    class Sound(rs):
        windowed = ArrayWithUnitsFeature(SlidingWindow,
                                         wscheme=wscheme,
                                         wfunc=wfunc,
                                         needs=rs.resampled,
                                         store=store_windowed)

    return Sound
Пример #12
0
    def test_can_encode_and_decode_variable_rate_time_Series(self):

        class TimestampEmitter(ff.Node):
            def __init__(self, needs=None):
                super(TimestampEmitter, self).__init__(needs=needs)
                self.pos = Picoseconds(0)

            def _process(self, data):
                td = data.dimensions[0]
                frequency = td.frequency
                timestamps = [self.pos + (i * frequency)
                        for i, d in enumerate(data)
                        if random() > 0.9]
                slices = TimeSlice.slices(timestamps)
                yield VariableRateTimeSeries(
                    (ts, np.zeros(0)) for ts in slices)
                self.pos += frequency * len(data)

        graph = stft(store_fft=True)

        @simple_in_memory_settings
        class Document(graph):
            slices = TimeSliceFeature(
                    TimestampEmitter,
                    needs=graph.fft,
                    store=True)

            pooled = VariableRateTimeSeriesFeature(
                    Pooled,
                    op=np.max,
                    axis=0,
                    needs=(slices, graph.fft),
                    store=False)

        signal = NoiseSynthesizer(SR44100())\
            .synthesize(Seconds(10))\
            .encode()
        _id = Document.process(meta=signal)
        doc = Document(_id)
        self.assertIsInstance(doc.pooled, VariableRateTimeSeries)
        self.assertEqual(doc.fft.shape[1], doc.pooled.slicedata.shape[1])
Пример #13
0
def stft(chunksize_bytes=DEFAULT_CHUNK_SIZE,
         resample_to=SR44100(),
         wscheme=HalfLapped(),
         store_fft=False,
         fft_padding_samples=None,
         store_windowed=False,
         store_resampled=False):
    class ShortTimeFourierTransform(BaseModel):
        meta = JSONFeature(MetaData, store=True, encoder=AudioMetaDataEncoder)

        raw = ByteStreamFeature(ByteStream,
                                chunksize=chunksize_bytes,
                                needs=meta,
                                store=False)

        ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True)

        pcm = AudioSamplesFeature(AudioStream, needs=raw, store=False)

        resampled = AudioSamplesFeature(Resampler,
                                        needs=pcm,
                                        samplerate=resample_to,
                                        store=store_resampled)

        windowed = ArrayWithUnitsFeature(SlidingWindow,
                                         needs=resampled,
                                         wscheme=wscheme,
                                         wfunc=OggVorbisWindowingFunc(),
                                         store=store_windowed)

        fft = ArrayWithUnitsFeature(FFT,
                                    padding_samples=fft_padding_samples,
                                    needs=windowed,
                                    store=store_fft)

    return ShortTimeFourierTransform
Пример #14
0
 def __init__(self, path):
     super(MusicNet, self).__init__()
     self.path = path
     self._metadata = \
         'https://homes.cs.washington.edu/~thickstn/media/musicnet_metadata.csv'
     self._samplerate = SR44100()
Пример #15
0
import numpy as np
from featureflow import BaseModel, JSONFeature, ByteStream, ByteStreamFeature
from zounds.soundfile import \
    MetaData, AudioMetaDataEncoder, OggVorbis, OggVorbisFeature, AudioStream, \
    Resampler, ChunkSizeBytes
from zounds.segment import \
    ComplexDomain, MovingAveragePeakPicker, TimeSliceFeature
from zounds.persistence import ArrayWithUnitsFeature, AudioSamplesFeature, \
    FrequencyAdaptiveFeature
from zounds.timeseries import SR44100, HalfLapped, Stride, Seconds
from zounds.spectral import \
    SlidingWindow, OggVorbisWindowingFunc, FFT, BarkBands, SpectralCentroid, \
    Chroma, BFCC, DCT, FrequencyAdaptiveTransform, FrequencyBand

DEFAULT_CHUNK_SIZE = ChunkSizeBytes(samplerate=SR44100(),
                                    duration=Seconds(30),
                                    bit_depth=16,
                                    channels=2)


def resampled(chunksize_bytes=DEFAULT_CHUNK_SIZE,
              resample_to=SR44100(),
              store_resampled=False):
    """
    Create a basic processing pipeline that can resample all incoming audio
    to a normalized sampling rate for downstream processing, and store a
    convenient, compressed version for playback

    :param chunksize_bytes: The number of bytes from the raw stream to process
    at once
    :param resample_to: The new, normalized sampling rate
Пример #16
0
 def __init__(self, samplerate=None, needs=None):
     super(Resampler, self).__init__(needs=needs)
     self._samplerate = samplerate or SR44100()
     self._resample = None
Пример #17
0
 def test_can_invert_fft_44100(self):
     self.can_invert_fft(SR44100())
Пример #18
0
 def test_correct_output_with_stereo(self):
     synth = SilenceSynthesizer(SR44100())
     samples = synth.synthesize(Seconds(1)).stereo
     rs = Resample(int(samples.samplerate), int(SR11025()), nchannels=2)
     resampled = rs(samples, end_of_input=True)
     self.assertEqual((11025, 2), resampled.shape)
Пример #19
0
 def test_audible_range_lower_bound(self):
     band = FrequencyBand.audible_range(SR44100())
     self.assertEqual(20, band.start_hz)
Пример #20
0
 def test_can_convert_to_integer_number_of_bytes(self):
     cs = ChunkSizeBytes(SR44100(), Seconds(30), channels=2, bit_depth=16)
     self.assertEqual(5292000, int(cs))
Пример #21
0
 def test_correct_window_and_step_size_at_44100(self):
     self._check(SR44100(), 2048, 1024)
Пример #22
0
 def test_audible_range_upper_bound(self):
     sr = SR44100()
     band = FrequencyBand.audible_range(sr)
     self.assertEqual(int(sr) // 2, band.stop_hz)
Пример #23
0
def soundfile(flo=None):
    synth = NoiseSynthesizer(SR44100())
    samples = synth.synthesize(Seconds(5)).stereo
    flo = samples.encode(flo=flo)
    return samples, flo
Пример #24
0
from soundfile import SoundFile

from zounds.timeseries import TimeSlice, AudioSamples, SR44100, HalfLapped, \
    Seconds, Milliseconds, Stride
from zounds.persistence import ArrayWithUnitsFeature, AudioSamplesFeature
from zounds.soundfile import \
    AudioStream, OggVorbis, OggVorbisFeature, Resampler
from zounds.spectral import \
    SlidingWindow, OggVorbisWindowingFunc, FFT, Chroma, BarkBands, BFCC, \
    FrequencyBand
from zounds.basic import Max
from zounds.util import simple_in_memory_settings
from featureflow import *

windowing_scheme = HalfLapped()
samplerate = SR44100()
band = FrequencyBand(20, samplerate.nyquist)


@simple_in_memory_settings
class Document(BaseModel):
    raw = ByteStreamFeature(
        ByteStream,
        chunksize=2 * 44100 * 30 * 2,
        store=True)

    ogg = OggVorbisFeature(
        OggVorbis,
        needs=raw,
        store=True)
Пример #25
0
 def test_generates_correct_samplerate(self):
     ss = SineSynthesizer(SR44100())
     audio = ss.synthesize(Seconds(4), freqs_in_hz=[440.])
     self.assertEqual(SR44100(), audio.samplerate)
Пример #26
0
 def setUp(self):
     self.samplerate = SR44100()
     self.wscheme = HalfLapped()
     self.STFT = stft(store_fft=True,
                      resample_to=self.samplerate,
                      wscheme=self.wscheme)
Пример #27
0
 def x(samples):
     rs = Resample(int(SR44100()), int(SR11025()))
     return rs(samples, end_of_input=True)