def test_can_apply_sliding_window_to_time_frequency_representation(self): band = FrequencyBand(0, 22000) scale = LinearScale(band, 100) arr = ArrayWithUnits(np.zeros( (200, 100)), [TimeDimension(Seconds(1)), FrequencyDimension(scale)]) sw = SampleRate(Seconds(2), Seconds(2)) @simple_in_memory_settings class Document(BaseModel): windowed = ArrayWithUnitsFeature(SlidingWindow, wscheme=sw, store=True) _id = Document.process(windowed=arr) result = Document(_id).windowed self.assertIsInstance(result, ArrayWithUnits) self.assertEqual((100, 2, 100), result.shape) self.assertEqual(3, len(result.dimensions)) self.assertIsInstance(result.dimensions[0], TimeDimension) self.assertEqual(Seconds(2), result.dimensions[0].frequency) self.assertIsInstance(result.dimensions[1], TimeDimension) self.assertEqual(Seconds(1), result.dimensions[1].frequency) self.assertIsInstance(result.dimensions[2], FrequencyDimension)
def test_can_round_trip_3d_constant_rate_time_series_with_frequency_dim( self): dim1 = TimeDimension(Seconds(2), Milliseconds(1000)) dim2 = TimeDimension(Seconds(1), Milliseconds(500)) scale = LinearScale(FrequencyBand(20, 20000), 100) dim3 = FrequencyDimension(scale) raw = np.random.random_sample((5, 2, 100)) ts = ArrayWithUnits(raw, (dim1, dim2, dim3)) decoded = self._roundtrip(ts) self.assertIsInstance(decoded, ArrayWithUnits) self.assertEqual(3, len(decoded.dimensions)) td1 = decoded.dimensions[0] self.assertIsInstance(td1, TimeDimension) self.assertEqual(Seconds(2), td1.frequency) self.assertEqual(Milliseconds(1000), td1.duration) td2 = decoded.dimensions[1] self.assertIsInstance(td2, TimeDimension) self.assertEqual(Seconds(1), td2.frequency) self.assertEqual(Milliseconds(500), td2.duration) fd = decoded.dimensions[2] self.assertIsInstance(fd, FrequencyDimension) self.assertEqual(scale, fd.scale) np.testing.assert_allclose(decoded, raw)
def test_can_round_trip_specific_scale_type(self): band = FrequencyBand(20, 20000) scale = LinearScale(band, 50) dim = FrequencyDimension(scale) encoded = self.encoder.encode(dim) decoded = self.decoder.decode(encoded) self.assertIsInstance(decoded.scale, LinearScale) self.assertEqual(scale, decoded.scale)
def test_can_round_trip_mixed_dimensions(self): original = [ IdentityDimension(), TimeDimension(Seconds(1), Milliseconds(500)), FrequencyDimension(LinearScale(FrequencyBand(100, 1000), 10)) ] restored = self.roundtrip(original) self.assertSequenceEqual(original, restored)
def test_can_round_trip_linear_scale(self): scale = LinearScale(FrequencyBand(20, 4000), n_bands=100) encoder_decoder = LinearScaleEncoderDecoder() self.assertTrue(encoder_decoder.can_encode(scale)) encoded = encoder_decoder.encode(scale) self.assertTrue(encoder_decoder.can_decode(encoded)) decoded = encoder_decoder.decode(encoded) self.assertEqual(scale, decoded)
def test_maintains_array_with_units_dimensions(self): trainer = SupervisedTrainer( AutoEncoder(), loss=nn.MSELoss(), optimizer=lambda model: SGD(model.parameters(), lr=0.1), epochs=2, batch_size=64, checkpoint_epochs=2) @simple_in_memory_settings class Pipeline(ff.BaseModel): inp = ff.PickleFeature(ff.IteratorNode, store=False) samples = ff.PickleFeature(ShuffledSamples, nsamples=500, dtype=np.float32, needs=inp, store=False) unitnorm = ff.PickleFeature(UnitNorm, needs=samples, store=False) network = ff.PickleFeature(PyTorchAutoEncoder, trainer=trainer, needs=unitnorm, store=False) pipeline = ff.PickleFeature(PreprocessingPipeline, needs=(unitnorm, network), store=True) training = np.random.random_sample((1000, 3)) def gen(chunksize, s): for i in xrange(0, len(s), chunksize): yield s[i:i + chunksize] _id = Pipeline.process(inp=gen(100, training)) pipe = Pipeline(_id) test = ArrayWithUnits(np.random.random_sample( (10, 3)).astype(np.float32), dimensions=[ TimeDimension(Seconds(1)), FrequencyDimension( LinearScale(FrequencyBand(100, 1000), 3)) ]) result = pipe.pipeline.transform(test) self.assertEqual((10, 2), result.data.shape) self.assertIsInstance(result.data, ArrayWithUnits) self.assertIsInstance(result.data.dimensions[0], TimeDimension) self.assertIsInstance(result.data.dimensions[1], IdentityDimension) inverted = result.inverse_transform() self.assertEqual((10, 3), inverted.shape) self.assertIsInstance(inverted, ArrayWithUnits) self.assertIsInstance(inverted.dimensions[0], TimeDimension) self.assertIsInstance(inverted.dimensions[1], FrequencyDimension)
def test_inversion_returns_time_frequency_representation(self): data = np.random.random_sample((33, 30)) scale = LinearScale(FrequencyBand(20, 20000), 30) tf = ArrayWithUnits( data, [TimeDimension(Seconds(1), Seconds(2)), FrequencyDimension(scale)]) inverted = self.invert_and_assert_class(tf) self.assertEqual(Seconds(1), inverted.dimensions[0].frequency) self.assertEqual(Seconds(2), inverted.dimensions[0].duration) self.assertEqual(scale, inverted.dimensions[1].scale)
def test_has_correct_sample_rate(self): half_lapped = HalfLapped() synth = DCTSynthesizer() raw = np.zeros((100, 2048)) band = FrequencyBand(0, SR44100().nyquist) scale = LinearScale(band, raw.shape[1]) timeseries = ArrayWithUnits( raw, [TimeDimension(*half_lapped), FrequencyDimension(scale)]) output = synth.synthesize(timeseries) self.assertIsInstance(output.samplerate, SR44100) self.assertIsInstance(output, AudioSamples)
def test_can_invert_array_with_units(self): td = TimeDimension(Seconds(1)) fd = FrequencyDimension(LinearScale(FrequencyBand(0, 20000), 100)) dimensions = [IdentityDimension(), td, fd] training = ArrayWithUnits(np.zeros((10, 5, 100)), dimensions) Model = self.get_model(slicex=FrequencyBand(1000, 10000)) _id = Model.process(sliced=training) model = Model(_id) data = ArrayWithUnits(np.ones((2, 5, 100)), dimensions) transformed = model.pipeline.transform(data) inverted = transformed.inverse_transform() self.assertEqual((2, 5, 100), inverted.shape) self.assertEqual(IdentityDimension(), inverted.dimensions[0]) self.assertEqual(td, inverted.dimensions[1]) self.assertEqual(fd, inverted.dimensions[2])
def test_array_with_units(self): r = Reservoir(100) frequency_dimension = FrequencyDimension( LinearScale(FrequencyBand(100, 1000), 100)) samples = ArrayWithUnits(np.ones( (20, 100)), [TimeDimension(frequency=Seconds(1)), frequency_dimension]) r.add(samples) mixed = r.get() self.assertIsInstance(mixed, ArrayWithUnits) self.assertEqual(100, mixed.shape[1]) self.assertIsInstance(mixed.dimensions[0], IdentityDimension) self.assertIsInstance(mixed.dimensions[1], FrequencyDimension)
def test_can_sample_from_one_dimensional_feature(self): sampler = ReservoirSampler(nsamples=10) frequency_dimension = FrequencyDimension( LinearScale(FrequencyBand(100, 1000), 100)) samples = ArrayWithUnits(np.ones( (20, 100)), [TimeDimension(frequency=Seconds(1)), frequency_dimension]) sampler._enqueue(samples, pusher=None) reservoir = sampler._r self.assertEqual((10, 100), reservoir.shape) self.assertIsInstance(reservoir, ArrayWithUnits) self.assertEqual(reservoir.dimensions[0], IdentityDimension()) self.assertEqual(reservoir.dimensions[1], frequency_dimension)
def test_should_preserve_time_dimension_in_forward_transform(self): td = TimeDimension(Seconds(1)) td2 = TimeDimension(Milliseconds(500)) fd = FrequencyDimension(LinearScale(FrequencyBand(10, 100), 3)) training_data = ArrayWithUnits(np.zeros((10, 5, 3)), dimensions=(IdentityDimension(), td2, fd)) _id = Document.process(l=training_data) doc = Document(_id) test_data = ArrayWithUnits(np.zeros((11, 5, 3)), dimensions=(td, td2, fd)) result = doc.pipeline.transform(test_data) self.assertEqual((11, 15), result.data.shape) self.assertIsInstance(result.data, ArrayWithUnits) self.assertEqual(td, result.data.dimensions[0]) self.assertEqual(IdentityDimension(), result.data.dimensions[1])
def test_sliding_window_maintains_dtype(self): band = FrequencyBand(0, 22000) scale = LinearScale(band, 100) arr = ArrayWithUnits( np.zeros((200, 100), dtype=np.uint8), [TimeDimension(Seconds(1)), FrequencyDimension(scale)]) sw = SampleRate(Seconds(2), Seconds(2)) @simple_in_memory_settings class Document(BaseModel): windowed = ArrayWithUnitsFeature(SlidingWindow, wscheme=sw, store=True) _id = Document.process(windowed=arr) result = Document(_id).windowed self.assertEqual(np.uint8, result.dtype)
def synthesize(self, freq_adaptive_coeffs): fac = freq_adaptive_coeffs linear_scale = LinearScale.from_sample_rate( self.samplerate, self._n_linear_scale_bands(fac), always_even=self.scale_slices_always_even) frequency_dimension = FrequencyDimension(linear_scale) coeffs = ArrayWithUnits( np.zeros((len(fac), linear_scale.n_bands), dtype=self.coeffs_dtype), dimensions=[fac.dimensions[0], frequency_dimension]) for band in self.scale: coeffs[:, band] += self.band_transform(fac[:, band], norm='ortho') return self.short_time_synth.synthesize(coeffs)
def test_should_restore_all_dimensions_in_backward_transform(self): td = TimeDimension(Seconds(1)) td2 = TimeDimension(Milliseconds(500)) fd = FrequencyDimension(LinearScale(FrequencyBand(10, 100), 3)) training_data = ArrayWithUnits(np.zeros((10, 5, 3)), dimensions=(IdentityDimension(), td2, fd)) _id = Document.process(l=training_data) doc = Document(_id) test_data = ArrayWithUnits(np.zeros((11, 5, 3)), dimensions=(td, td2, fd)) result = doc.pipeline.transform(test_data) inverted = result.inverse_transform() self.assertEqual((11, 5, 3), inverted.shape) self.assertIsInstance(inverted, ArrayWithUnits) self.assertEqual(td, inverted.dimensions[0]) self.assertEqual(td2, inverted.dimensions[1]) self.assertEqual(fd, inverted.dimensions[2])
def test_can_dequeue_when_reservoir_is_partially_full(self): sampler = ReservoirSampler(nsamples=10) frequency_dimension = FrequencyDimension( LinearScale(FrequencyBand(100, 1000), 100)) samples = ArrayWithUnits(np.ones((4, 10, 100)), [ TimeDimension(frequency=Seconds(10)), TimeDimension(frequency=Seconds(1)), frequency_dimension ]) sampler._enqueue(samples, pusher=None) reservoir = sampler._dequeue() self.assertEqual((4, 10, 100), reservoir.shape) self.assertIsInstance(reservoir, ArrayWithUnits) self.assertEqual(reservoir.dimensions[0], IdentityDimension()) self.assertEqual(reservoir.dimensions[1], samples.dimensions[1]) self.assertEqual(reservoir.dimensions[2], samples.dimensions[2])
def test_forward_transform_returns_array_with_units_where_possible(self): # train the model on random data training = np.random.random_sample((100, 30)) Model = self.get_model() _id = Model.process(unitnorm=training) model = Model(_id) # create a time-frequency representation scale = LinearScale(FrequencyBand(20, 20000), 30) data = ArrayWithUnits(np.random.random_sample( (10, 30)), [TimeDimension(Seconds(1)), FrequencyDimension(scale)]) # do a forward pass transformed = model.pipeline.transform(data).data self.assertIsInstance(transformed, ArrayWithUnits) self.assertEqual(2, len(transformed.dimensions)) self.assertIsInstance(transformed.dimensions[0], TimeDimension) self.assertIsInstance(transformed.dimensions[1], IdentityDimension)
def decode(self, d): band = FrequencyBand(d['start_hz'], d['stop_hz']) return LinearScale(band, d['n_bands'], always_even=d['always_even'])
def test_can_maintain_array_dimensions_with_supervised_learning(self): trainer = SupervisedTrainer( model=SupervisedNetwork(), loss=nn.BCELoss(), optimizer=lambda model: SGD(model.parameters(), lr=0.2), epochs=1, batch_size=64, data_preprocessor=lambda x: x.astype(np.float32), label_preprocessor=lambda x: x.astype(np.float32)) @simple_in_memory_settings class Pipeline(ff.BaseModel): inp = ff.PickleFeature(ff.IteratorNode, store=False) samples = ff.PickleFeature(ShuffledSamples, nsamples=500, multiplexed=True, dtype=np.float32, needs=inp, store=False) unitnorm = ff.PickleFeature(UnitNorm, needs=samples.aspect('data'), store=False) hard_labels = ff.PickleFeature(Binarize, needs=samples.aspect('labels'), store=False) network = ff.PickleFeature(PyTorchNetwork, trainer=trainer, needs=dict(data=unitnorm, labels=hard_labels), store=False) pipeline = ff.PickleFeature(PreprocessingPipeline, needs=(unitnorm, network), store=True) # Produce some random points on the unit circle samples = np.random.random_sample((1000, 2)) samples /= np.linalg.norm(samples, axis=1, keepdims=True) # a line extending from the origin to (1, 1) origin = np.array([0, 0]) unit = np.array([1, 1]) # which side of the plane is each sample on? labels = np.sign(np.cross(unit - origin, origin - samples)) labels[labels < 0] = 0 # scale each sample randomly, forcing the pipeline to normalize data factors = np.random.randint(1, 1000, (len(samples), 1)) scaled_samples = samples * factors scaled_samples = scaled_samples # fuzz the labels, forcing the pipeline to binarize these (i.e., force # them to be 0 or 1) fuzzed_labels = labels + np.random.normal(0, 0.1, labels.shape) fuzzed_labels = fuzzed_labels[..., None] def gen(chunksize, s, l): for i in xrange(0, len(s), chunksize): sl = slice(i, i + chunksize) yield dict(data=s[sl], labels=l[sl]) _id = Pipeline.process(inp=gen(100, scaled_samples, fuzzed_labels)) pipe = Pipeline(_id) # produce some new samples new_samples = np.random.random_sample((1000, 2)) new_samples /= np.linalg.norm(samples, axis=1, keepdims=True) # scale each example randomly, so the pipeline must give it unit norm # to arrive at the correct answer new_factors = np.random.randint(1, 1000, (len(samples), 1)) new_scaled_samples = new_factors * new_samples arr = ArrayWithUnits(new_scaled_samples, dimensions=[ TimeDimension(Seconds(1)), FrequencyDimension( LinearScale(FrequencyBand(100, 1000), 2)) ]) result = pipe.pipeline.transform(arr.astype(np.float32)) self.assertIsInstance(result.data, ArrayWithUnits) self.assertIsInstance(result.data.dimensions[0], TimeDimension)