def test_can_invert_array_with_units(self): td = TimeDimension(Seconds(1)) fd = FrequencyDimension(LinearScale(FrequencyBand(0, 20000), 100)) dimensions = [IdentityDimension(), td, fd] training = ArrayWithUnits(np.zeros((10, 5, 100)), dimensions) Model = self.get_model(slicex=FrequencyBand(1000, 10000)) _id = Model.process(sliced=training) model = Model(_id) data = ArrayWithUnits(np.ones((2, 5, 100)), dimensions) transformed = model.pipeline.transform(data) inverted = transformed.inverse_transform() self.assertEqual((2, 5, 100), inverted.shape) self.assertEqual(IdentityDimension(), inverted.dimensions[0]) self.assertEqual(td, inverted.dimensions[1]) self.assertEqual(fd, inverted.dimensions[2])
def test_can_apply_sliding_window_to_time_frequency_representation(self): band = FrequencyBand(0, 22000) scale = LinearScale(band, 100) arr = ArrayWithUnits(np.zeros( (200, 100)), [TimeDimension(Seconds(1)), FrequencyDimension(scale)]) sw = SampleRate(Seconds(2), Seconds(2)) @simple_in_memory_settings class Document(BaseModel): windowed = ArrayWithUnitsFeature(SlidingWindow, wscheme=sw, store=True) _id = Document.process(windowed=arr) result = Document(_id).windowed self.assertIsInstance(result, ArrayWithUnits) self.assertEqual((100, 2, 100), result.shape) self.assertEqual(3, len(result.dimensions)) self.assertIsInstance(result.dimensions[0], TimeDimension) self.assertEqual(Seconds(2), result.dimensions[0].frequency) self.assertIsInstance(result.dimensions[1], TimeDimension) self.assertEqual(Seconds(1), result.dimensions[1].frequency) self.assertIsInstance(result.dimensions[2], FrequencyDimension)
def test_can_round_trip_3d_constant_rate_time_series_with_frequency_dim( self): dim1 = TimeDimension(Seconds(2), Milliseconds(1000)) dim2 = TimeDimension(Seconds(1), Milliseconds(500)) scale = LinearScale(FrequencyBand(20, 20000), 100) dim3 = FrequencyDimension(scale) raw = np.random.random_sample((5, 2, 100)) ts = ArrayWithUnits(raw, (dim1, dim2, dim3)) decoded = self._roundtrip(ts) self.assertIsInstance(decoded, ArrayWithUnits) self.assertEqual(3, len(decoded.dimensions)) td1 = decoded.dimensions[0] self.assertIsInstance(td1, TimeDimension) self.assertEqual(Seconds(2), td1.frequency) self.assertEqual(Milliseconds(1000), td1.duration) td2 = decoded.dimensions[1] self.assertIsInstance(td2, TimeDimension) self.assertEqual(Seconds(1), td2.frequency) self.assertEqual(Milliseconds(500), td2.duration) fd = decoded.dimensions[2] self.assertIsInstance(fd, FrequencyDimension) self.assertEqual(scale, fd.scale) np.testing.assert_allclose(decoded, raw)
def test_can_round_trip_specific_scale_type(self): band = FrequencyBand(20, 20000) scale = LinearScale(band, 50) dim = FrequencyDimension(scale) encoded = self.encoder.encode(dim) decoded = self.decoder.decode(encoded) self.assertIsInstance(decoded.scale, LinearScale) self.assertEqual(scale, decoded.scale)
def test_raises_when_scale_factors_is_not_a_collection_or_float(self): sr = SR22050() band = FrequencyBand(1, sr.nyquist) scale = MelScale(band, 512) scale_factors = object() self.assertRaises( TypeError, lambda: morlet_filter_bank(sr, 512, scale, scale_factors))
def test_can_round_trip_linear_scale(self): scale = LinearScale(FrequencyBand(20, 4000), n_bands=100) encoder_decoder = LinearScaleEncoderDecoder() self.assertTrue(encoder_decoder.can_encode(scale)) encoded = encoder_decoder.encode(scale) self.assertTrue(encoder_decoder.can_decode(encoded)) decoded = encoder_decoder.decode(encoded) self.assertEqual(scale, decoded)
def test_raises_when_scale_factors_length_does_not_match_scale(self): sr = SR22050() band = FrequencyBand(1, sr.nyquist) scale = MelScale(band, 512) scale_factors = np.linspace(0.1, 1.0, len(scale) // 2) self.assertRaises( ValueError, lambda: morlet_filter_bank(sr, 512, scale, scale_factors))
def test_can_round_trip_mixed_dimensions(self): original = [ IdentityDimension(), TimeDimension(Seconds(1), Milliseconds(500)), FrequencyDimension(LinearScale(FrequencyBand(100, 1000), 10)) ] restored = self.roundtrip(original) self.assertSequenceEqual(original, restored)
def test_can_assign_to_multi_band_frequency_slice(self): td = TimeDimension(frequency=Seconds(1)) scale = GeometricScale(20, 5000, 0.05, 10) arrs = [np.zeros((10, x)) for x in xrange(1, 11)] fa = FrequencyAdaptive(arrs, td, scale) band = FrequencyBand(300, 3030) fa[:, band] = 1 int_slice = fa.dimensions[1].integer_based_slice(band) np.testing.assert_allclose(fa[:, int_slice], 1)
def audio_graph(chunksize_bytes=DEFAULT_CHUNK_SIZE, resample_to=SR44100(), store_fft=False): """ Produce a base class suitable as a starting point for many audio processing pipelines. This class resamples all audio to a common sampling rate, and produces a bark band spectrogram from overlapping short-time fourier transform frames. It also compresses the audio into ogg vorbis format for compact storage. """ band = FrequencyBand(20, resample_to.nyquist) class AudioGraph(BaseModel): meta = JSONFeature(MetaData, store=True, encoder=AudioMetaDataEncoder) raw = ByteStreamFeature(ByteStream, chunksize=chunksize_bytes, needs=meta, store=False) ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True) pcm = AudioSamplesFeature(AudioStream, needs=raw, store=False) resampled = AudioSamplesFeature(Resampler, needs=pcm, samplerate=resample_to, store=False) windowed = ArrayWithUnitsFeature(SlidingWindow, needs=resampled, wscheme=HalfLapped(), wfunc=OggVorbisWindowingFunc(), store=False) dct = ArrayWithUnitsFeature(DCT, needs=windowed, store=True) fft = ArrayWithUnitsFeature(FFT, needs=windowed, store=store_fft) bark = ArrayWithUnitsFeature(BarkBands, needs=fft, frequency_band=band, store=True) centroid = ArrayWithUnitsFeature(SpectralCentroid, needs=bark, store=True) chroma = ArrayWithUnitsFeature(Chroma, needs=fft, frequency_band=band, store=True) bfcc = ArrayWithUnitsFeature(BFCC, needs=fft, store=True) return AudioGraph
def test_maintains_array_with_units_dimensions(self): trainer = SupervisedTrainer( AutoEncoder(), loss=nn.MSELoss(), optimizer=lambda model: SGD(model.parameters(), lr=0.1), epochs=2, batch_size=64, checkpoint_epochs=2) @simple_in_memory_settings class Pipeline(ff.BaseModel): inp = ff.PickleFeature(ff.IteratorNode, store=False) samples = ff.PickleFeature(ShuffledSamples, nsamples=500, dtype=np.float32, needs=inp, store=False) unitnorm = ff.PickleFeature(UnitNorm, needs=samples, store=False) network = ff.PickleFeature(PyTorchAutoEncoder, trainer=trainer, needs=unitnorm, store=False) pipeline = ff.PickleFeature(PreprocessingPipeline, needs=(unitnorm, network), store=True) training = np.random.random_sample((1000, 3)) def gen(chunksize, s): for i in xrange(0, len(s), chunksize): yield s[i:i + chunksize] _id = Pipeline.process(inp=gen(100, training)) pipe = Pipeline(_id) test = ArrayWithUnits(np.random.random_sample( (10, 3)).astype(np.float32), dimensions=[ TimeDimension(Seconds(1)), FrequencyDimension( LinearScale(FrequencyBand(100, 1000), 3)) ]) result = pipe.pipeline.transform(test) self.assertEqual((10, 2), result.data.shape) self.assertIsInstance(result.data, ArrayWithUnits) self.assertIsInstance(result.data.dimensions[0], TimeDimension) self.assertIsInstance(result.data.dimensions[1], IdentityDimension) inverted = result.inverse_transform() self.assertEqual((10, 3), inverted.shape) self.assertIsInstance(inverted, ArrayWithUnits) self.assertIsInstance(inverted.dimensions[0], TimeDimension) self.assertIsInstance(inverted.dimensions[1], FrequencyDimension)
def test_1d_phase_shift_returns_correct_size(self): samplerate = SR22050() samples = SineSynthesizer(samplerate) \ .synthesize(Milliseconds(5500), [220, 440, 880]) coeffs = fft(samples) shifted = phase_shift(coeffs=coeffs, samplerate=samplerate, time_shift=Milliseconds(5500), frequency_band=FrequencyBand(50, 5000)) self.assertEqual(coeffs.shape, shifted.shape)
def test_inversion_returns_time_frequency_representation(self): data = np.random.random_sample((33, 30)) scale = LinearScale(FrequencyBand(20, 20000), 30) tf = ArrayWithUnits( data, [TimeDimension(Seconds(1), Seconds(2)), FrequencyDimension(scale)]) inverted = self.invert_and_assert_class(tf) self.assertEqual(Seconds(1), inverted.dimensions[0].frequency) self.assertEqual(Seconds(2), inverted.dimensions[0].duration) self.assertEqual(scale, inverted.dimensions[1].scale)
def test_has_correct_sample_rate(self): half_lapped = HalfLapped() synth = DCTSynthesizer() raw = np.zeros((100, 2048)) band = FrequencyBand(0, SR44100().nyquist) scale = LinearScale(band, raw.shape[1]) timeseries = ArrayWithUnits( raw, [TimeDimension(*half_lapped), FrequencyDimension(scale)]) output = synth.synthesize(timeseries) self.assertIsInstance(output.samplerate, SR44100) self.assertIsInstance(output, AudioSamples)
def test_dimensions_are_correct(self): sr = SR22050() band = FrequencyBand(1, sr.nyquist) scale = MelScale(band, 128) scale_factors = np.linspace(0.1, 1.0, len(scale)) filter_bank = morlet_filter_bank(sr, 512, scale, scale_factors) self.assertEqual((128, 512), filter_bank.shape) expected_freq_dimension = FrequencyDimension(scale) expected_time_dimension = TimeDimension(*sr) self.assertEqual(expected_freq_dimension, filter_bank.dimensions[0]) self.assertEqual(expected_time_dimension, filter_bank.dimensions[1])
def test_filters_are_normalized(self): sr = SR22050() band = FrequencyBand(1, sr.nyquist) scale = MelScale(band, 128) scale_factors = np.linspace(0.1, 1.0, len(scale)) filter_bank = morlet_filter_bank(sr, 512, scale, scale_factors, normalize=True) norms = np.linalg.norm(filter_bank, axis=-1) np.testing.assert_allclose(norms, 1.0, rtol=1e-6)
def test_can_phase_shift_1d_signal_180_degrees(self): samplerate = SR22050() samples = SineSynthesizer(samplerate) \ .synthesize(Seconds(1), [110, 220, 440, 880]) coeffs = fft(samples) shifted = phase_shift(coeffs=coeffs, samplerate=samplerate, time_shift=-Milliseconds(1000), frequency_band=FrequencyBand(50, 5000)) new_samples = np.fft.irfft(shifted, norm='ortho') self.assertAlmostEqual(0, self._mean_squared_error(samples, new_samples), 1)
def test_can_apply_frequency_slice_across_multiple_bands(self): td = TimeDimension(frequency=Seconds(1)) scale = GeometricScale(20, 5000, 0.05, 10) arrs = [np.zeros((10, x)) for x in xrange(1, 11)] fa = FrequencyAdaptive(arrs, td, scale) band = FrequencyBand(300, 3030) fa2 = fa[:, band] self.assertIsInstance(fa2, ArrayWithUnits) self.assertEqual(td, fa2.dimensions[0]) self.assertIsInstance(fa2.dimensions[1], ExplicitFrequencyDimension) self.assertIsInstance(fa2.dimensions[1].scale, ExplicitScale)
def test_2d_phase_shift_returns_correct_shape(self): samplerate = SR22050() samples = SineSynthesizer(samplerate) \ .synthesize(Milliseconds(2500), [220, 440, 880]) windowsize = TimeSlice(duration=Milliseconds(200)) stepsize = TimeSlice(duration=Milliseconds(100)) _, windowed = samples.sliding_window_with_leftovers( windowsize=windowsize, stepsize=stepsize, dopad=True) coeffs = fft(windowed) shifted = phase_shift(coeffs=coeffs, samplerate=samplerate, time_shift=Milliseconds(40), frequency_band=FrequencyBand(50, 5000)) self.assertEqual(coeffs.shape, shifted.shape)
def test_should_preserve_time_dimension_in_forward_transform(self): td = TimeDimension(Seconds(1)) td2 = TimeDimension(Milliseconds(500)) fd = FrequencyDimension(LinearScale(FrequencyBand(10, 100), 3)) training_data = ArrayWithUnits(np.zeros((10, 5, 3)), dimensions=(IdentityDimension(), td2, fd)) _id = Document.process(l=training_data) doc = Document(_id) test_data = ArrayWithUnits(np.zeros((11, 5, 3)), dimensions=(td, td2, fd)) result = doc.pipeline.transform(test_data) self.assertEqual((11, 15), result.data.shape) self.assertIsInstance(result.data, ArrayWithUnits) self.assertEqual(td, result.data.dimensions[0]) self.assertEqual(IdentityDimension(), result.data.dimensions[1])
def test_array_with_units(self): r = Reservoir(100) frequency_dimension = FrequencyDimension( LinearScale(FrequencyBand(100, 1000), 100)) samples = ArrayWithUnits(np.ones( (20, 100)), [TimeDimension(frequency=Seconds(1)), frequency_dimension]) r.add(samples) mixed = r.get() self.assertIsInstance(mixed, ArrayWithUnits) self.assertEqual(100, mixed.shape[1]) self.assertIsInstance(mixed.dimensions[0], IdentityDimension) self.assertIsInstance(mixed.dimensions[1], FrequencyDimension)
def test_can_sample_from_one_dimensional_feature(self): sampler = ReservoirSampler(nsamples=10) frequency_dimension = FrequencyDimension( LinearScale(FrequencyBand(100, 1000), 100)) samples = ArrayWithUnits(np.ones( (20, 100)), [TimeDimension(frequency=Seconds(1)), frequency_dimension]) sampler._enqueue(samples, pusher=None) reservoir = sampler._r self.assertEqual((10, 100), reservoir.shape) self.assertIsInstance(reservoir, ArrayWithUnits) self.assertEqual(reservoir.dimensions[0], IdentityDimension()) self.assertEqual(reservoir.dimensions[1], frequency_dimension)
def test_should_restore_all_dimensions_in_backward_transform(self): td = TimeDimension(Seconds(1)) td2 = TimeDimension(Milliseconds(500)) fd = FrequencyDimension(LinearScale(FrequencyBand(10, 100), 3)) training_data = ArrayWithUnits(np.zeros((10, 5, 3)), dimensions=(IdentityDimension(), td2, fd)) _id = Document.process(l=training_data) doc = Document(_id) test_data = ArrayWithUnits(np.zeros((11, 5, 3)), dimensions=(td, td2, fd)) result = doc.pipeline.transform(test_data) inverted = result.inverse_transform() self.assertEqual((11, 5, 3), inverted.shape) self.assertIsInstance(inverted, ArrayWithUnits) self.assertEqual(td, inverted.dimensions[0]) self.assertEqual(td2, inverted.dimensions[1]) self.assertEqual(fd, inverted.dimensions[2])
def test_sliding_window_maintains_dtype(self): band = FrequencyBand(0, 22000) scale = LinearScale(band, 100) arr = ArrayWithUnits( np.zeros((200, 100), dtype=np.uint8), [TimeDimension(Seconds(1)), FrequencyDimension(scale)]) sw = SampleRate(Seconds(2), Seconds(2)) @simple_in_memory_settings class Document(BaseModel): windowed = ArrayWithUnitsFeature(SlidingWindow, wscheme=sw, store=True) _id = Document.process(windowed=arr) result = Document(_id).windowed self.assertEqual(np.uint8, result.dtype)
def test_can_dequeue_when_reservoir_is_partially_full(self): sampler = ReservoirSampler(nsamples=10) frequency_dimension = FrequencyDimension( LinearScale(FrequencyBand(100, 1000), 100)) samples = ArrayWithUnits(np.ones((4, 10, 100)), [ TimeDimension(frequency=Seconds(10)), TimeDimension(frequency=Seconds(1)), frequency_dimension ]) sampler._enqueue(samples, pusher=None) reservoir = sampler._dequeue() self.assertEqual((4, 10, 100), reservoir.shape) self.assertIsInstance(reservoir, ArrayWithUnits) self.assertEqual(reservoir.dimensions[0], IdentityDimension()) self.assertEqual(reservoir.dimensions[1], samples.dimensions[1]) self.assertEqual(reservoir.dimensions[2], samples.dimensions[2])
def test_forward_transform_returns_array_with_units_where_possible(self): # train the model on random data training = np.random.random_sample((100, 30)) Model = self.get_model() _id = Model.process(unitnorm=training) model = Model(_id) # create a time-frequency representation scale = LinearScale(FrequencyBand(20, 20000), 30) data = ArrayWithUnits(np.random.random_sample( (10, 30)), [TimeDimension(Seconds(1)), FrequencyDimension(scale)]) # do a forward pass transformed = model.pipeline.transform(data).data self.assertIsInstance(transformed, ArrayWithUnits) self.assertEqual(2, len(transformed.dimensions)) self.assertIsInstance(transformed.dimensions[0], TimeDimension) self.assertIsInstance(transformed.dimensions[1], IdentityDimension)
def test_can_maintain_array_dimensions_with_supervised_learning(self): trainer = SupervisedTrainer( model=SupervisedNetwork(), loss=nn.BCELoss(), optimizer=lambda model: SGD(model.parameters(), lr=0.2), epochs=1, batch_size=64, data_preprocessor=lambda x: x.astype(np.float32), label_preprocessor=lambda x: x.astype(np.float32)) @simple_in_memory_settings class Pipeline(ff.BaseModel): inp = ff.PickleFeature(ff.IteratorNode, store=False) samples = ff.PickleFeature(ShuffledSamples, nsamples=500, multiplexed=True, dtype=np.float32, needs=inp, store=False) unitnorm = ff.PickleFeature(UnitNorm, needs=samples.aspect('data'), store=False) hard_labels = ff.PickleFeature(Binarize, needs=samples.aspect('labels'), store=False) network = ff.PickleFeature(PyTorchNetwork, trainer=trainer, needs=dict(data=unitnorm, labels=hard_labels), store=False) pipeline = ff.PickleFeature(PreprocessingPipeline, needs=(unitnorm, network), store=True) # Produce some random points on the unit circle samples = np.random.random_sample((1000, 2)) samples /= np.linalg.norm(samples, axis=1, keepdims=True) # a line extending from the origin to (1, 1) origin = np.array([0, 0]) unit = np.array([1, 1]) # which side of the plane is each sample on? labels = np.sign(np.cross(unit - origin, origin - samples)) labels[labels < 0] = 0 # scale each sample randomly, forcing the pipeline to normalize data factors = np.random.randint(1, 1000, (len(samples), 1)) scaled_samples = samples * factors scaled_samples = scaled_samples # fuzz the labels, forcing the pipeline to binarize these (i.e., force # them to be 0 or 1) fuzzed_labels = labels + np.random.normal(0, 0.1, labels.shape) fuzzed_labels = fuzzed_labels[..., None] def gen(chunksize, s, l): for i in xrange(0, len(s), chunksize): sl = slice(i, i + chunksize) yield dict(data=s[sl], labels=l[sl]) _id = Pipeline.process(inp=gen(100, scaled_samples, fuzzed_labels)) pipe = Pipeline(_id) # produce some new samples new_samples = np.random.random_sample((1000, 2)) new_samples /= np.linalg.norm(samples, axis=1, keepdims=True) # scale each example randomly, so the pipeline must give it unit norm # to arrive at the correct answer new_factors = np.random.randint(1, 1000, (len(samples), 1)) new_scaled_samples = new_factors * new_samples arr = ArrayWithUnits(new_scaled_samples, dimensions=[ TimeDimension(Seconds(1)), FrequencyDimension( LinearScale(FrequencyBand(100, 1000), 2)) ]) result = pipe.pipeline.transform(arr.astype(np.float32)) self.assertIsInstance(result.data, ArrayWithUnits) self.assertIsInstance(result.data.dimensions[0], TimeDimension)
from zounds.timeseries import TimeSlice, AudioSamples, SR44100, HalfLapped, \ Seconds, Milliseconds, Stride from zounds.persistence import ArrayWithUnitsFeature, AudioSamplesFeature from zounds.soundfile import \ AudioStream, OggVorbis, OggVorbisFeature, Resampler from zounds.spectral import \ SlidingWindow, OggVorbisWindowingFunc, FFT, Chroma, BarkBands, BFCC, \ FrequencyBand from zounds.basic import Max from zounds.util import simple_in_memory_settings from featureflow import * windowing_scheme = HalfLapped() samplerate = SR44100() band = FrequencyBand(20, samplerate.nyquist) @simple_in_memory_settings class Document(BaseModel): raw = ByteStreamFeature( ByteStream, chunksize=2 * 44100 * 30 * 2, store=True) ogg = OggVorbisFeature( OggVorbis, needs=raw, store=True) pcm = AudioSamplesFeature(
def decode(self, d): band = FrequencyBand(d['start_hz'], d['stop_hz']) return LinearScale(band, d['n_bands'], always_even=d['always_even'])
def test_raises_when_encountering_unknown_scale(self): band = FrequencyBand(20, 20000) scale = FrequencyScale(band, 50) dim = FrequencyDimension(scale) self.assertRaises(NotImplementedError, lambda: self.encoder.encode(dim))