def main(args): train, valid, _ = load_cifar() whiten, color = pca(train) feat = args.features or int(np.sqrt(2 * K)) e = theanets.Experiment(theanets.Autoencoder((K, feat**2, K))) e.train(whiten(train), whiten(valid), input_noise=1, train_batches=313) plot_layers([ color(e.network.find('hid1', 'w').get_value().T).T, color(e.network.find('out', 'w').get_value()) ], channels=3) plt.tight_layout() plt.show() valid = whiten(valid[:100]) plot_images(color(valid), 121, 'Sample data', channels=3) plot_images(color(e.network.predict(valid)), 122, 'Reconstructed data', channels=3) plt.tight_layout() plt.show()
def train_net_theano(): train, valid, test = load_weights() print('Training AE') start = time.time() ae_struct = AutoencoderStructure() ae_struct.print_ae_structure() net = theanets.Autoencoder(layers=ae_struct.get_ae_structure()) net.train( train, valid, algo='adadelta', #rmsprop # patience=.1, min_improvement=.01, #input_noise=.1, train_batches=1000, momentum=.9, weight_l2=.0001) end = time.time() print(np.linalg.norm(net.decode(net.encode(test)) - test)) print("Elapsed time: ") print(round(end - start, 2)) return net
def fill(dfs, rank, window): '''Complete missing marker data using a nonlinear autoencoder model. This method alters the given `dfs` in-place. Parameters ---------- dfs : list of pd.DataFrame Frames of source data. The frames will be stacked into a single large frame to use during encoding. This stacked frame will then be split and returned. rank : int Encode the data using a nonlinear matrix decomposition of this rank. window : int Model windows of this many consecutive frames. ''' df = lmj.cubes.fill.stack(dfs, window) centers = lmj.cubes.fill.center(df) pos, vis, _ = lmj.cubes.fill.window(df, window) d = pos.shape[1] net = theanets.Autoencoder((d, (int(rank), 'sigmoid'), d), weighted=True) for tm, _ in net.itertrain( [pos.astype('f'), vis.astype('f')], batch_size=128, momentum=0.5): if tm['loss'] < lmj.cubes.fill.PHASESPACE_TOLERANCE: break batches = (net.predict(pos[o:o + 64].astype('f')) for o in range(0, len(pos), 64)) lmj.cubes.fill.update(df, np.concatenate(list(batches), axis=0), window) lmj.cubes.fill.restore(df, centers) lmj.cubes.fill.unstack(df, dfs)
def test_save_every(tmpdir): net = theanets.Autoencoder((u.NUM_INPUTS, (3, 'prelu'), u.NUM_INPUTS)) p = tmpdir.mkdir('graph-test').join('model.pkl') fn = os.path.join(p.dirname, p.basename) train = net.itertrain([u.INPUTS], save_every=2, save_progress=fn) for i, _ in enumerate(zip(train, range(9))): if i == 3 or i == 5 or i == 7: assert p.check() else: assert not p.check() if p.check(): p.remove()
def test_save_every(self): net = theanets.Autoencoder( (self.NUM_INPUTS, (3, 'prelu'), self.NUM_INPUTS)) f, p = tempfile.mkstemp(suffix='pkl') os.close(f) os.unlink(p) train = net.itertrain([self.INPUTS], save_every=2, save_progress=p) for i, _ in enumerate(zip(train, range(9))): if i == 3 or i == 5 or i == 7: assert os.path.isfile(p) else: assert not os.path.isfile(p) if os.path.exists(p): os.unlink(p)
def test_params_matching(self): net = theanets.Autoencoder([10, 20, 30, 10]) match = sorted(theanets.util.params_matching(net, '*')) assert len(match) == 6 assert [n for n, _ in match] == [ 'hid1.b', 'hid1.w', 'hid2.b', 'hid2.w', 'out.b', 'out.w' ] match = sorted(theanets.util.params_matching(net, '*.w')) assert len(match) == 3 assert [n for n, _ in match] == ['hid1.w', 'hid2.w', 'out.w'] match = sorted(theanets.util.params_matching(net, 'o*.?')) assert len(match) == 2 assert [n for n, _ in match] == ['out.b', 'out.w']
def test_outputs_matching(self): outputs, _ = theanets.Autoencoder([10, 20, 30, 10]).build_graph() match = sorted(theanets.util.outputs_matching(outputs, '*')) assert len(match) == 7 assert [n for n, _ in match] == [ 'hid1:out', 'hid1:pre', 'hid2:out', 'hid2:pre', 'in:out', 'out:out', 'out:pre' ] match = sorted(theanets.util.outputs_matching(outputs, 'hid?:*')) assert len(match) == 4 assert [n for n, _ in match ] == ['hid1:out', 'hid1:pre', 'hid2:out', 'hid2:pre'] match = sorted(theanets.util.outputs_matching(outputs, '*:pre')) assert len(match) == 3 assert [n for n, _ in match] == ['hid1:pre', 'hid2:pre', 'out:pre']
def main(args): # load up the MNIST digit dataset. train, valid, _ = load_mnist() net = theanets.Autoencoder([784, args.features**2, 784]) net.train(train, valid, input_noise=0.1, weight_l2=0.0001, algo='rmsprop', momentum=0.9, min_improvement=0.1) plot_layers([net.find('hid1', 'w'), net.find('out', 'w')]) plt.tight_layout() plt.show() v = valid[:100] plot_images(v, 121, 'Sample data') plot_images(net.predict(v), 122, 'Reconstructed data') plt.tight_layout() plt.show()
def compress(source, k, activation, **kwargs): fns = sorted(glob.glob(os.path.join(source, '*', '*_jac.csv.gz'))) logging.info('%s: found %d jacobians', source, len(fns)) # the clipping operation affects about 2% of jacobian values. dfs = [np.clip(pd.read_csv(fn, index_col='time').dropna(), -10, 10) for fn in fns] B, N = 128, dfs[0].shape[1] logging.info('loaded %s rows of %d-D data from %d files', sum(len(df) for df in dfs), N, len(dfs)) def batch(): batch = np.zeros((B, N), 'f') for b in range(B): a = np.random.randint(len(dfs)) batch[b] = dfs[a].iloc[np.random.randint(len(dfs[a])), :] return [batch] pca = theanets.Autoencoder([N, (k, activation), (N, 'tied')]) pca.train(batch, **kwargs) key = '{}_k{}'.format(activation, k) if 'hidden_l1' in kwargs: key += '_s{hidden_l1:.4f}'.format(**kwargs) for df, fn in zip(dfs, fns): df = pd.DataFrame(pca.encode(df.values.astype('f')), index=df.index) s = io.StringIO() df.to_csv(s, index_label='time') out = fn.replace('_jac', '_jac_' + key) with gzip.open(out, 'wb') as handle: handle.write(s.getvalue().encode('utf-8')) logging.info('%s: saved %s', out, df.shape) out = os.path.join(source, 'pca_{}.pkl'.format(key)) pickle.dump(pca, open(out, 'wb'))
def learnFeatures(data, encoding_dim=32, activation_function='sigmoid', output_function='sigmoid', kSize=3, NChannelsPerImage=3, lamda_act=(10e-2, 10e-3), lamda_w=(10e-2, 10e-3), nb_epoch=50, batch_size=256): N = kSize**2 * NChannelsPerImage ae = theanets.Autoencoder([N, (encoding_dim, 'sigmoid'), (N, 'tied')]) ae.train([data], hidden_l1=lamda_act) weights = ae.find('hid1', 'w').get_value() bias = ae.find('hid1', 'b').get_value() weights = encoder.get_weights()[0].T weights = np.reshape(weights, (encoding_dim, NChannelsPerImage, kSize, kSize)) return [weights, bias]
def test_predict(self, layer): net = theanets.Autoencoder([NI, NH, NH, layer, NI]) assert net.predict(u.INPUTS).shape == (u.NUM_EXAMPLES, NI)
def test_feed_forward(self): net = theanets.Autoencoder((self.NUM_INPUTS, self.l0, self.l)) out = net.predict(self.INPUTS) assert out.shape == (self.NUM_EXAMPLES, self.NUM_INPUTS)
def _build(self, *hiddens, **kwargs): return theanets.Autoencoder( layers=(self.DIGIT_SIZE, ) + hiddens + (self.DIGIT_SIZE, ), hidden_activation='logistic', **kwargs)
vecs = vecs[:, :K] def whiten(x): return np.dot(x, np.dot(vecs, np.diag(1. / vals))) def color(z): return np.dot(z, np.dot(np.diag(vals), vecs.T)) # now train our model on the whitened dataset. N = 20 net = theanets.Autoencoder([K, (N * N, 'linear'), (K, 'tied')]) net.train(whiten(train), whiten(valid), hidden_l1=0.5, weightinverse=1e-6, train_batches=300, monitors={'hid1:out': (-0.9, -0.1, 0.1, 0.9)}) # color the network weights so they are viewable as digits. plot_layers([color(net.find('hid1', 'w').get_value().T).T], tied_weights=True) plt.tight_layout() plt.show() plot_images(valid[:N * N], 121, 'Sample data') plot_images(color(net.predict(whiten(valid[:N * N]))), 122,
def load_training(): global __model __model = the.Autoencoder([768, (100, 'sigmoid'), (768, 'tied', 'sigmoid')]) return True
def load_deep_AE(): global __model __model = the.Autoencoder(layers=(768,400,(100,'sigmoid'),('tied',400,'sigmoid'),('tied',768,'sigmoid'))) return True
def test_mae(self): self.exp = theanets.Autoencoder((self.NUM_INPUTS, self.NUM_INPUTS), loss='mae') assert self.exp.losses[0].__class__.__name__ == 'MeanAbsoluteError' self.assert_progress('sgd', [self.INPUTS])
def _build(self, *hiddens, **kwargs): return theanets.Autoencoder( layers=(784, ) + hiddens + (784, ), activation='logistic', **kwargs)
def test_layerwise_tied(): ae = theanets.Autoencoder([ u.NUM_INPUTS, u.NUM_HID1, u.NUM_HID2, (u.NUM_HID1, 'tied'), (u.NUM_INPUTS, 'tied') ]) u.assert_progress(ae, u.AE_DATA, algo='layerwise')
def ae(): return theanets.Autoencoder( [u.NUM_INPUTS, u.NUM_HID1, u.NUM_HID2, u.NUM_INPUTS])
def _build(self, *hiddens): return theanets.Autoencoder((self.DIGIT_SIZE, ) + hiddens + (self.DIGIT_SIZE, ))
def _build(self, *hiddens): return theanets.Autoencoder( [self.NUM_INPUTS] + list(hiddens) + [self.NUM_INPUTS])
def build(self, *hiddens): return theanets.Autoencoder( [self.NUM_INPUTS] + list(hiddens) + [self.NUM_INPUTS], weighted=True)
#!/usr/bin/env python import matplotlib.pyplot as plt import theanets from utils import load_mnist, plot_layers, plot_images train, valid, _ = load_mnist() net = theanets.Autoencoder(layers=(784, 256, 100, 64, ('tied', 100), ('tied', 256), ('tied', 784)), ) net.train(train, valid, algo='layerwise', patience=1, min_improvement=0.05, train_batches=100) net.train(train, valid, min_improvment=0.01, train_batches=100) plot_layers([net.find(i, 'w') for i in (1, 2, 3)], tied_weights=True) plt.tight_layout() plt.show() valid = valid[0][:100] plot_images(valid, 121, 'Sample data') plot_images(net.predict(valid), 122, 'Reconstructed data') plt.tight_layout() plt.show()
def net(self): return theanets.Autoencoder(u.AE_LAYERS)
def test_feed_forward(self): net = theanets.Autoencoder((Base.INPUTS, self.l0, self.l)) out = net.predict(np.random.randn(8, Base.INPUTS).astype('f')) assert out.shape == (8, Base.INPUTS)
def build(self): return theanets.Autoencoder([self.NUM_INPUTS, 10, self.NUM_INPUTS])