def test_can_apply_weighting_to_filter_bank(self): sr = SR11025() band = FrequencyBand(20, sr.nyquist) scale = MelScale(band, 100) bank = fir_filter_bank(scale, 256, sr, np.hanning(25)) weighted = bank * AWeighting() self.assertSequenceEqual(bank.dimensions, weighted.dimensions)
def test_square_form_no_overlap_add(self): samplerate = SR11025() BaseModel = stft(resample_to=samplerate) windowing_func = OggVorbisWindowingFunc() scale = GeometricScale(20, 5000, 0.1, 25) @simple_in_memory_settings class Document(BaseModel): long_windowed = ArrayWithUnitsFeature( SlidingWindow, wscheme=SampleRate(frequency=Milliseconds(500), duration=Seconds(1)), wfunc=windowing_func, needs=BaseModel.resampled, store=True) dct = ArrayWithUnitsFeature(DCT, scale_always_even=True, needs=long_windowed, store=True) mdct = FrequencyAdaptiveFeature(FrequencyAdaptiveTransform, transform=scipy.fftpack.idct, scale=scale, needs=dct, store=True) synth = TickSynthesizer(SR22050()) samples = synth.synthesize(Seconds(5), Milliseconds(200)) _id = Document.process(meta=samples.encode()) doc = Document(_id) square = doc.mdct.square(30) self.assertEqual(3, square.ndim) self.assertEqual(30, square.shape[1]) self.assertEqual(25, square.shape[2])
def _model(self, slice_size, settings): STFT = stft(resample_to=SR11025(), store_fft=True) def pack(x): arr = np.zeros((len(x), 16), dtype=np.uint64) return ArrayWithUnits(arr, [x.dimensions[0], IdentityDimension()]) class Model(STFT, settings): binary = ArrayWithUnitsFeature( Binarize, predicate=lambda data: data >= 0, needs=STFT.fft, store=False) sliced = ArrayWithUnitsFeature( Slice, sl=slice(0, slice_size), needs=binary, store=True) packed = ArrayWithUnitsFeature( pack, needs=sliced, store=True) return Model
def test_can_invert_long_fft(self): samplerate = SR11025() rs = resampled(resample_to=samplerate) @simple_in_memory_settings class Document(rs): long_windowed = ArrayWithUnitsFeature( SlidingWindow, wscheme=SampleRate(Milliseconds(500), Seconds(1)), wfunc=OggVorbisWindowingFunc(), needs=rs.resampled, store=True) long_fft = ArrayWithUnitsFeature(FFT, needs=long_windowed, store=True) synth = SineSynthesizer(samplerate) audio = synth.synthesize(Seconds(2), freqs_in_hz=[440., 880.]) _id = Document.process(meta=audio.encode()) doc = Document(_id) fft_synth = FFTSynthesizer() recon = fft_synth.synthesize(doc.long_fft) self.assertIsInstance(recon, AudioSamples) self.assertEqual(audio.dimensions, recon.dimensions)
def test_sliding_window_has_correct_dimensions(self): arr = np.random.randint(0, 255, (11025 * 2)).astype(np.int64) sr = SR11025() awu = ArrayWithUnits(arr, [TimeDimension(*sr)]) ws = TimeSlice(duration=sr.frequency * 8192) ss = TimeSlice(duration=sr.frequency * 4096) l, x = awu.sliding_window_with_leftovers(ws, ss) self.assertEqual(8192, x.shape[1])
def test_can_apply_empty_time_slice_to_wrapper(self): synth = SineSynthesizer(SR11025()) samples = synth.synthesize(Seconds(10)) encoded = samples.encode(fmt='OGG', subtype='VORBIS') wrapper = OggVorbisWrapper(encoded) samples = wrapper[TimeSlice()] expected = Seconds(10) / Seconds(1) actual = samples.end / Seconds(1) self.assertAlmostEqual(expected, actual, places=6)
def test_can_round_trip_audio_samples(self): raw = np.random.random_sample(11025 * 10) arr = AudioSamples(raw, SR11025()) decoded = self._roundtrip(arr) self.assertIsInstance(decoded, ArrayWithUnits) self.assertEqual(1, len(decoded.dimensions)) td = decoded.dimensions[0] self.assertIsInstance(td, TimeDimension) np.testing.assert_allclose(decoded, raw)
def test_correctly_infers_code_size_16(self): Model = self._model(slice_size=128, settings=self._settings_with_event_log()) index = self._index(Model, Model.sliced) signal = SineSynthesizer(SR11025()) \ .synthesize(Seconds(5), [220, 440, 880]) Model.process(meta=signal.encode()) index._synchronously_process_events() self.assertEqual(16, index.hamming_db.code_size)
def test_can_convert_to_categorical_distribution(self): samplerate = SR11025() synth = SineSynthesizer(samplerate) samples = synth.synthesize(Seconds(4), [220, 440, 880]) _, windowed = samples.sliding_window_with_leftovers( TimeSlice(duration=samplerate.frequency * 512), TimeSlice(duration=samplerate.frequency * 256)) c = categorical(windowed, mu=255) self.assertEqual(windowed.shape + (255 + 1, ), c.shape) np.testing.assert_allclose(c.sum(axis=-1), 1)
def test_listen_raises_if_model_class_has_no_event_log_configured(self): Model = self._model(slice_size=64, settings=self._settings_with_no_event_log()) index = self._index(Model, Model.sliced) signal = SineSynthesizer(SR11025()) \ .synthesize(Seconds(5), [220, 440, 880]) Model.process(meta=signal.encode()) self.assertRaises(ValueError, lambda: index._synchronously_process_events())
def correctly_infers_index_name(self): Model = self._model(slice_size=128, settings=self._settings_with_event_log()) index = self._index(Model, Model.sliced) signal = SineSynthesizer(SR11025()) \ .synthesize(Seconds(5), [220, 440, 880]) Model.process(meta=signal.encode()) index._synchronously_process_events() self.assertTrue('index.sliced' in index.hamming_db.path)
def test_can_add_already_packed_feature(self): Model = self._model(slice_size=128, settings=self._settings_with_no_event_log()) index = self._index(Model, Model.packed) signal = SineSynthesizer(SR11025()) \ .synthesize(Seconds(5), [220, 440, 880]) _id = Model.process(meta=signal.encode()) model = Model(_id) index.add(_id) self.assertEqual(len(model.packed), len(index))
def test_can_rotate_90_degrees(self): sr = SR11025() hl = sr.half_lapped() scale = GeometricScale(20, sr.nyquist, 0.175, 64) td = TimeDimension(frequency=hl.frequency, duration=hl.duration) fd = FrequencyDimension(scale) arr = ArrayWithUnits(np.zeros((99, 64)), [td, fd]) rotated = np.rot90(arr) self.assertEqual((64, 99), rotated.shape) self.assertEqual(arr.dimensions[0], rotated.dimensions[1]) self.assertEqual(arr.dimensions[1], rotated.dimensions[0])
def test_can_invert_categorical_distribution(self): samplerate = SR11025() synth = SineSynthesizer(samplerate) samples = synth.synthesize(Seconds(4), [220, 440, 880]) _, windowed = samples.sliding_window_with_leftovers( TimeSlice(duration=samplerate.frequency * 512), TimeSlice(duration=samplerate.frequency * 256)) c = categorical(windowed, mu=255) inverted = inverse_categorical(c, mu=255) self.assertEqual(windowed.shape, inverted.shape) self.assertIsInstance(inverted, ArrayWithUnits) self.assertSequenceEqual(windowed.dimensions, inverted.dimensions)
def test_hamming_db_is_initialized_if_docs_exist(self): Model = self._model(slice_size=128, settings=self._settings_with_event_log()) index = self._index(Model, Model.sliced) signal = SineSynthesizer(SR11025()) \ .synthesize(Seconds(5), [220, 440, 880]) Model.process(meta=signal.encode()) index._synchronously_process_events() index2 = self._index(Model, Model.sliced) self.assertIsNotNone(index2.hamming_db) self.assertEqual(16, index2.hamming_db.code_size)
def test_can_roundtrip_query(self): Model = self._model(slice_size=128, settings=self._settings_with_event_log()) index = self._index(Model, Model.sliced) signal = SineSynthesizer(SR11025()) \ .synthesize(Seconds(5), [220, 440, 880]) Model.process(meta=signal.encode()) index._synchronously_process_events() results = index.random_search(n_results=5) decoded = index.decode_query(results.query) encoded = index.encode_query(decoded) self.assertEqual(results.query, encoded)
def test_can_apply_sliding_window(self): sr = SR11025() hl = sr.half_lapped() scale = GeometricScale(20, sr.nyquist, 0.175, 64) td = TimeDimension(frequency=hl.frequency, duration=hl.duration) fd = FrequencyDimension(scale) arr = ArrayWithUnits(np.zeros((99, 64)), [td, fd]) ts = TimeSlice(duration=hl.frequency * 64) fs = FrequencyBand(0, sr.nyquist) windowed = arr.sliding_window((ts, fs)) self.assertEqual((1, 64, 64), windowed.shape)
def test_can_add_additional_data_to_index(self): Model = self._model(slice_size=128, settings=self._settings_with_event_log()) index = self._index(Model, Model.sliced, web_url=lambda doc, ts: doc.meta['web_url']) signal = SineSynthesizer(SR11025()) \ .synthesize(Seconds(5), [220, 440, 880]) meta = AudioMetaData(uri=signal.encode(), web_url='https://example.com') _id = Model.process(meta=meta) index._synchronously_process_events() results = list(index.random_search(n_results=5)) result_id, ts, extra_data = results[0] self.assertEqual(_id, result_id) self.assertEqual('https://example.com', extra_data['web_url'])
def setUp(self): self.samplerate = SR11025() rs = resampled(resample_to=self.samplerate) wscheme = HalfLapped() @simple_in_memory_settings class Document(rs): windowed = ArrayWithUnitsFeature(SlidingWindow, wscheme=wscheme, needs=rs.resampled, store=False) mdct = ArrayWithUnitsFeature(MDCT, needs=windowed, store=True) ss = SineSynthesizer(self.samplerate) self.audio = ss.synthesize(Seconds(5), [440., 660., 880.]) _id = Document.process(meta=self.audio.encode()) self.doc = Document(_id)
def test_can_apply_sliding_windows_in_succession(self): samplerate = SR11025() short_window = samplerate * (16, 512) long_window = SampleRate(frequency=short_window.frequency * 1, duration=short_window.frequency * 64) rs = resampled(resample_to=samplerate, store_resampled=True) samples = AudioSamples.silence(samplerate, Seconds(10)) @simple_in_memory_settings class Sound(rs): short_windowed = ArrayWithUnitsFeature(SlidingWindow, wscheme=short_window, needs=rs.resampled) long_windowed = ArrayWithUnitsFeature(SlidingWindow, wscheme=long_window, needs=short_windowed) _id = Sound.process(meta=samples.encode()) snd = Sound(_id) self.assertEqual((512, ), snd.short_windowed.shape[1:]) self.assertEqual((64, 512), snd.long_windowed.shape[1:])
def x(samples): rs = Resample(int(SR44100()), int(SR11025())) return rs(samples, end_of_input=True)
def test_multiplication_by_weighting_is_commutative(self): sr = SR11025() band = FrequencyBand(20, sr.nyquist) scale = MelScale(band, 100) bank = fir_filter_bank(scale, 256, sr, np.hanning(25)) np.testing.assert_allclose(bank * AWeighting(), AWeighting() * bank)
def test_correct_window_and_step_size_at_11025(self): self._check(SR11025(), 512, 256)
def test_noise_synth_outputs_values_in_correct_range(self): ns = NoiseSynthesizer(SR11025()) audio = ns.synthesize(Seconds(1)) self.assertLess(audio.min(), 0) self.assertGreater(audio.max(), 0)
def test_silence_synthesizer_outputs_zero(self): synth = SilenceSynthesizer(SR11025()) audio = synth.synthesize(Seconds(1)) np.testing.assert_allclose(audio, 0)
def test_can_invert_fft_11025(self): self.can_invert_fft(SR11025())
def __init__(self, experiment_name, dataset, gan_pair, object_storage_username, object_storage_api_key, sound_cls=None, sound_feature=None, epochs=500, n_critic_iterations=10, batch_size=32, n_samples=int(5e5), latent_dim=100, real_sample_transformer=lambda x: x, preprocess_minibatch=lambda x: x, debug_gradients=False, sample_size=8192, sample_hop=1024, samplerate=SR11025(), app_port=8888, object_storage_region='DFW', app_secret=None): super(GanExperiment, self).__init__() self.real_sample_transformer = real_sample_transformer self.debug_gradients = debug_gradients self.n_samples = n_samples self.batch_size = batch_size self.n_critic_iterations = n_critic_iterations self.epochs = epochs self.gan_pair = gan_pair self.app_port = app_port self.dataset = dataset self.preprocess_minibatch = preprocess_minibatch self.samplerate = samplerate self.sample_hop = sample_hop self.sample_size = sample_size self.latent_dim = latent_dim self.experiment_name = experiment_name self.app_secret = app_secret if sound_cls: self.sound_cls = sound_cls else: base_model = windowed(resample_to=self.samplerate, store_resampled=True, wscheme=self.samplerate * (sample_hop, sample_size)) @simple_lmdb_settings(experiment_name, map_size=1e11, user_supplied_id=True) class Sound(base_model): pass self.sound_cls = Sound self.sound_feature = sound_feature or self.sound_cls.windowed @object_store_pipeline_settings( 'Gan-{experiment_name}'.format(**locals()), object_storage_region, object_storage_username, object_storage_api_key) @infinite_streaming_learning_pipeline class Gan(ff.BaseModel): scaled = ff.PickleFeature(InstanceScaling) wgan = ff.PickleFeature(PyTorchGan, trainer=ff.Var('trainer'), needs=scaled) self.gan_pipeline = Gan() self.fake_samples = None self.app = None
def test_correct_output_with_stereo(self): synth = SilenceSynthesizer(SR44100()) samples = synth.synthesize(Seconds(1)).stereo rs = Resample(int(samples.samplerate), int(SR11025()), nchannels=2) resampled = rs(samples, end_of_input=True) self.assertEqual((11025, 2), resampled.shape)