class BarkKmeans(ff.BaseModel): docs = ff.Feature( ff.IteratorNode, store=False) shuffle = ff.NumpyFeature( zounds.ShuffledSamples, nsamples=int(1e6), needs=docs, store=True) unitnorm = ff.PickleFeature( zounds.UnitNorm, needs=shuffle, store=False) kmeans = ff.PickleFeature( zounds.KMeans, centroids=128, needs=unitnorm, store=False) pipeline = ff.PickleFeature( zounds.PreprocessingPipeline, needs=(unitnorm, kmeans), store=True)
class FreqAdaptiveAutoEncoder(ff.BaseModel): """ Define a processing pipeline to learn a compressed representation of the Sound.freq_adaptive feature. Once this is trained and the pipeline is stored, we can apply all the pre-processing steps and the autoencoder forward and in reverse. """ docs = ff.Feature(ff.IteratorNode) shuffle = ff.PickleFeature(zounds.ShuffledSamples, nsamples=500000, dtype=np.float32, needs=docs) mu_law = ff.PickleFeature(zounds.MuLawCompressed, needs=shuffle) scaled = ff.PickleFeature(zounds.InstanceScaling, needs=mu_law) autoencoder = ff.PickleFeature( zounds.PyTorchAutoEncoder, trainer=zounds.SupervisedTrainer( AutoEncoder(), loss=nn.MSELoss(), optimizer=lambda model: optim.Adam(model.parameters(), lr=0.00005), epochs=100, batch_size=64, holdout_percent=0.5), needs=scaled) # assemble the previous steps into a re-usable pipeline, which can perform # forward and backward transformations pipeline = ff.PickleFeature(zounds.PreprocessingPipeline, needs=(mu_law, scaled, autoencoder), store=True)
class InfiniteLearningPipeline(cls): dataset = ff.Feature( InfiniteSampler, nsamples=ff.Var('nsamples'), dtype=ff.Var('dtype'), feature_filter=ff.Var('feature_filter'), parallel=ff.Var('parallel')) pipeline = ff.ClobberPickleFeature( PreprocessingPipeline, needs=cls.features, store=True) @classmethod def load_network(cls): if not cls.exists(): raise RuntimeError('No network has been trained or saved') instance = cls() for p in instance.pipeline: try: return p.network except AttributeError: pass raise RuntimeError('There is no network in the pipeline')
class Corpus(ff.BaseModel): """ Define the processing graph needed to extract corpus-level features, whether, and how those features should be persisted. """ docs = ff.Feature( lambda doc_cls: (doc.counts for doc in doc_cls), store=False) total_counts = ff.JSONFeature( WordCount, needs=docs, store=True)
class DctKmeansWithLogAmplitude(ff.BaseModel): """ A pipeline that applies a logarithmic weighting to the magnitudes of the spectrum before learning centroids, """ docs = ff.Feature( ff.IteratorNode, store=False) # randomize the order of the data shuffle = ff.NumpyFeature( zounds.ReservoirSampler, nsamples=1e6, needs=docs, store=True) log = ff.PickleFeature( zounds.Log, needs=shuffle, store=False) # give each frame unit norm, since we care about the shape of the spectrum # and not its magnitude unit_norm = ff.PickleFeature( zounds.UnitNorm, needs=log, store=False) # learn 512 centroids, or basis functions kmeans = ff.PickleFeature( zounds.KMeans, centroids=512, needs=unit_norm, store=False) # assemble the previous steps into a re-usable pipeline, which can perform # forward and backward transformations pipeline = ff.PickleFeature( zounds.PreprocessingPipeline, needs=(log, unit_norm, kmeans), store=True)
class Rbm(featureflow.BaseModel, Settings): iterator = featureflow.Feature(Iterator, store=False) shuffle = featureflow.NumpyFeature(ReservoirSampler, nsamples=1000, needs=iterator, store=True) unitnorm = featureflow.PickleFeature(UnitNorm, needs=shuffle, store=False) meanstd = featureflow.PickleFeature(MeanStdNormalization, needs=unitnorm, store=False) rbm = featureflow.PickleFeature(KMeans, centroids=3, needs=meanstd, store=False) pipeline = featureflow.PickleFeature(PreprocessingPipeline, needs=(unitnorm, meanstd, rbm), store=True)
class Document(ff.BaseModel): """ Define the processing graph needed to extract document-level features, whether, and how those features should be persisted. """ raw = ff.ByteStreamFeature( ff.ByteStream, chunksize=128, store=True) checksum = ff.JSONFeature( CheckSum, needs=raw, store=True) tokens = ff.Feature( Tokenizer, needs=raw, store=False) counts = ff.JSONFeature( WordCount, needs=tokens, store=True)
class DctKmeans(ff.BaseModel): """ A pipeline that does example-wise normalization by giving each example unit-norm, and learns 512 centroids from those examples. """ docs = ff.Feature( ff.IteratorNode, store=False) # randomize the order of the data shuffle = ff.NumpyFeature( zounds.ReservoirSampler, nsamples=1e6, needs=docs, store=True) # give each frame unit norm, since we care about the shape of the spectrum # and not its magnitude unit_norm = ff.PickleFeature( zounds.UnitNorm, needs=shuffle, store=False) # learn 512 centroids, or basis functions kmeans = ff.PickleFeature( zounds.KMeans, centroids=512, needs=unit_norm, store=False) # assemble the previous steps into a re-usable pipeline, which can perform # forward and backward transformations pipeline = ff.PickleFeature( zounds.PreprocessingPipeline, needs=(unit_norm, kmeans), store=True)