Exemplo n.º 1
0
def main(args):
    train, valid, _ = load_cifar()

    whiten, color = pca(train)

    feat = args.features or int(np.sqrt(2 * K))
    e = theanets.Experiment(theanets.Autoencoder((K, feat**2, K)))
    e.train(whiten(train), whiten(valid), input_noise=1, train_batches=313)

    plot_layers([
        color(e.network.find('hid1', 'w').get_value().T).T,
        color(e.network.find('out', 'w').get_value())
    ],
                channels=3)
    plt.tight_layout()
    plt.show()

    valid = whiten(valid[:100])
    plot_images(color(valid), 121, 'Sample data', channels=3)
    plot_images(color(e.network.predict(valid)),
                122,
                'Reconstructed data',
                channels=3)
    plt.tight_layout()
    plt.show()
def train_net_theano():
    train, valid, test = load_weights()

    print('Training AE')
    start = time.time()

    ae_struct = AutoencoderStructure()
    ae_struct.print_ae_structure()

    net = theanets.Autoencoder(layers=ae_struct.get_ae_structure())

    net.train(
        train,
        valid,
        algo='adadelta',  #rmsprop
        # patience=.1,
        min_improvement=.01,
        #input_noise=.1,
        train_batches=1000,
        momentum=.9,
        weight_l2=.0001)

    end = time.time()
    print(np.linalg.norm(net.decode(net.encode(test)) - test))
    print("Elapsed time: ")
    print(round(end - start, 2))

    return net
def fill(dfs, rank, window):
    '''Complete missing marker data using a nonlinear autoencoder model.

    This method alters the given `dfs` in-place.

    Parameters
    ----------
    dfs : list of pd.DataFrame
        Frames of source data. The frames will be stacked into a single large
        frame to use during encoding. This stacked frame will then be split and
        returned.
    rank : int
        Encode the data using a nonlinear matrix decomposition of this rank.
    window : int
        Model windows of this many consecutive frames.
    '''
    df = lmj.cubes.fill.stack(dfs, window)
    centers = lmj.cubes.fill.center(df)
    pos, vis, _ = lmj.cubes.fill.window(df, window)

    d = pos.shape[1]
    net = theanets.Autoencoder((d, (int(rank), 'sigmoid'), d), weighted=True)
    for tm, _ in net.itertrain(
        [pos.astype('f'), vis.astype('f')], batch_size=128, momentum=0.5):
        if tm['loss'] < lmj.cubes.fill.PHASESPACE_TOLERANCE:
            break

    batches = (net.predict(pos[o:o + 64].astype('f'))
               for o in range(0, len(pos), 64))
    lmj.cubes.fill.update(df, np.concatenate(list(batches), axis=0), window)
    lmj.cubes.fill.restore(df, centers)
    lmj.cubes.fill.unstack(df, dfs)
Exemplo n.º 4
0
def test_save_every(tmpdir):
    net = theanets.Autoencoder((u.NUM_INPUTS, (3, 'prelu'), u.NUM_INPUTS))
    p = tmpdir.mkdir('graph-test').join('model.pkl')
    fn = os.path.join(p.dirname, p.basename)
    train = net.itertrain([u.INPUTS], save_every=2, save_progress=fn)
    for i, _ in enumerate(zip(train, range(9))):
        if i == 3 or i == 5 or i == 7:
            assert p.check()
        else:
            assert not p.check()
        if p.check():
            p.remove()
Exemplo n.º 5
0
 def test_save_every(self):
     net = theanets.Autoencoder(
         (self.NUM_INPUTS, (3, 'prelu'), self.NUM_INPUTS))
     f, p = tempfile.mkstemp(suffix='pkl')
     os.close(f)
     os.unlink(p)
     train = net.itertrain([self.INPUTS], save_every=2, save_progress=p)
     for i, _ in enumerate(zip(train, range(9))):
         if i == 3 or i == 5 or i == 7:
             assert os.path.isfile(p)
         else:
             assert not os.path.isfile(p)
         if os.path.exists(p):
             os.unlink(p)
Exemplo n.º 6
0
    def test_params_matching(self):
        net = theanets.Autoencoder([10, 20, 30, 10])

        match = sorted(theanets.util.params_matching(net, '*'))
        assert len(match) == 6
        assert [n for n, _ in match] == [
            'hid1.b', 'hid1.w', 'hid2.b', 'hid2.w', 'out.b', 'out.w'
        ]

        match = sorted(theanets.util.params_matching(net, '*.w'))
        assert len(match) == 3
        assert [n for n, _ in match] == ['hid1.w', 'hid2.w', 'out.w']

        match = sorted(theanets.util.params_matching(net, 'o*.?'))
        assert len(match) == 2
        assert [n for n, _ in match] == ['out.b', 'out.w']
Exemplo n.º 7
0
    def test_outputs_matching(self):
        outputs, _ = theanets.Autoencoder([10, 20, 30, 10]).build_graph()

        match = sorted(theanets.util.outputs_matching(outputs, '*'))
        assert len(match) == 7
        assert [n for n, _ in match] == [
            'hid1:out', 'hid1:pre', 'hid2:out', 'hid2:pre', 'in:out',
            'out:out', 'out:pre'
        ]

        match = sorted(theanets.util.outputs_matching(outputs, 'hid?:*'))
        assert len(match) == 4
        assert [n for n, _ in match
                ] == ['hid1:out', 'hid1:pre', 'hid2:out', 'hid2:pre']

        match = sorted(theanets.util.outputs_matching(outputs, '*:pre'))
        assert len(match) == 3
        assert [n for n, _ in match] == ['hid1:pre', 'hid2:pre', 'out:pre']
Exemplo n.º 8
0
def main(args):
    # load up the MNIST digit dataset.
    train, valid, _ = load_mnist()

    net = theanets.Autoencoder([784, args.features**2, 784])
    net.train(train,
              valid,
              input_noise=0.1,
              weight_l2=0.0001,
              algo='rmsprop',
              momentum=0.9,
              min_improvement=0.1)

    plot_layers([net.find('hid1', 'w'), net.find('out', 'w')])
    plt.tight_layout()
    plt.show()

    v = valid[:100]
    plot_images(v, 121, 'Sample data')
    plot_images(net.predict(v), 122, 'Reconstructed data')
    plt.tight_layout()
    plt.show()
def compress(source, k, activation, **kwargs):
    fns = sorted(glob.glob(os.path.join(source, '*', '*_jac.csv.gz')))
    logging.info('%s: found %d jacobians', source, len(fns))

    # the clipping operation affects about 2% of jacobian values.
    dfs = [np.clip(pd.read_csv(fn, index_col='time').dropna(), -10, 10)
           for fn in fns]

    B, N = 128, dfs[0].shape[1]

    logging.info('loaded %s rows of %d-D data from %d files',
                 sum(len(df) for df in dfs), N, len(dfs))

    def batch():
        batch = np.zeros((B, N), 'f')
        for b in range(B):
            a = np.random.randint(len(dfs))
            batch[b] = dfs[a].iloc[np.random.randint(len(dfs[a])), :]
        return [batch]

    pca = theanets.Autoencoder([N, (k, activation), (N, 'tied')])
    pca.train(batch, **kwargs)

    key = '{}_k{}'.format(activation, k)
    if 'hidden_l1' in kwargs:
        key += '_s{hidden_l1:.4f}'.format(**kwargs)

    for df, fn in zip(dfs, fns):
        df = pd.DataFrame(pca.encode(df.values.astype('f')), index=df.index)
        s = io.StringIO()
        df.to_csv(s, index_label='time')
        out = fn.replace('_jac', '_jac_' + key)
        with gzip.open(out, 'wb') as handle:
            handle.write(s.getvalue().encode('utf-8'))
        logging.info('%s: saved %s', out, df.shape)

    out = os.path.join(source, 'pca_{}.pkl'.format(key))
    pickle.dump(pca, open(out, 'wb'))
Exemplo n.º 10
0
def learnFeatures(data,
                  encoding_dim=32,
                  activation_function='sigmoid',
                  output_function='sigmoid',
                  kSize=3,
                  NChannelsPerImage=3,
                  lamda_act=(10e-2, 10e-3),
                  lamda_w=(10e-2, 10e-3),
                  nb_epoch=50,
                  batch_size=256):

    N = kSize**2 * NChannelsPerImage

    ae = theanets.Autoencoder([N, (encoding_dim, 'sigmoid'), (N, 'tied')])
    ae.train([data], hidden_l1=lamda_act)

    weights = ae.find('hid1', 'w').get_value()
    bias = ae.find('hid1', 'b').get_value()

    weights = encoder.get_weights()[0].T
    weights = np.reshape(weights,
                         (encoding_dim, NChannelsPerImage, kSize, kSize))

    return [weights, bias]
Exemplo n.º 11
0
 def test_predict(self, layer):
     net = theanets.Autoencoder([NI, NH, NH, layer, NI])
     assert net.predict(u.INPUTS).shape == (u.NUM_EXAMPLES, NI)
Exemplo n.º 12
0
 def test_feed_forward(self):
     net = theanets.Autoencoder((self.NUM_INPUTS, self.l0, self.l))
     out = net.predict(self.INPUTS)
     assert out.shape == (self.NUM_EXAMPLES, self.NUM_INPUTS)
Exemplo n.º 13
0
 def _build(self, *hiddens, **kwargs):
     return theanets.Autoencoder(
         layers=(self.DIGIT_SIZE, ) + hiddens + (self.DIGIT_SIZE, ),
         hidden_activation='logistic',
         **kwargs)
Exemplo n.º 14
0
vecs = vecs[:, :K]


def whiten(x):
    return np.dot(x, np.dot(vecs, np.diag(1. / vals)))


def color(z):
    return np.dot(z, np.dot(np.diag(vals), vecs.T))


# now train our model on the whitened dataset.

N = 20

net = theanets.Autoencoder([K, (N * N, 'linear'), (K, 'tied')])

net.train(whiten(train),
          whiten(valid),
          hidden_l1=0.5,
          weightinverse=1e-6,
          train_batches=300,
          monitors={'hid1:out': (-0.9, -0.1, 0.1, 0.9)})

# color the network weights so they are viewable as digits.
plot_layers([color(net.find('hid1', 'w').get_value().T).T], tied_weights=True)
plt.tight_layout()
plt.show()

plot_images(valid[:N * N], 121, 'Sample data')
plot_images(color(net.predict(whiten(valid[:N * N]))), 122,
Exemplo n.º 15
0
def load_training():
    global __model
    __model = the.Autoencoder([768, (100, 'sigmoid'), (768, 'tied', 'sigmoid')])

    return True
Exemplo n.º 16
0
def load_deep_AE():
    global __model

    __model = the.Autoencoder(layers=(768,400,(100,'sigmoid'),('tied',400,'sigmoid'),('tied',768,'sigmoid')))

    return True
Exemplo n.º 17
0
 def test_mae(self):
     self.exp = theanets.Autoencoder((self.NUM_INPUTS, self.NUM_INPUTS), loss='mae')
     assert self.exp.losses[0].__class__.__name__ == 'MeanAbsoluteError'
     self.assert_progress('sgd', [self.INPUTS])
Exemplo n.º 18
0
 def _build(self, *hiddens, **kwargs):
     return theanets.Autoencoder(
         layers=(784, ) + hiddens + (784, ),
         activation='logistic',
         **kwargs)
Exemplo n.º 19
0
def test_layerwise_tied():
    ae = theanets.Autoencoder([
        u.NUM_INPUTS, u.NUM_HID1, u.NUM_HID2, (u.NUM_HID1, 'tied'),
        (u.NUM_INPUTS, 'tied')
    ])
    u.assert_progress(ae, u.AE_DATA, algo='layerwise')
Exemplo n.º 20
0
def ae():
    return theanets.Autoencoder(
        [u.NUM_INPUTS, u.NUM_HID1, u.NUM_HID2, u.NUM_INPUTS])
Exemplo n.º 21
0
 def _build(self, *hiddens):
     return theanets.Autoencoder((self.DIGIT_SIZE, ) + hiddens +
                                 (self.DIGIT_SIZE, ))
Exemplo n.º 22
0
 def _build(self, *hiddens):
     return theanets.Autoencoder(
         [self.NUM_INPUTS] + list(hiddens) + [self.NUM_INPUTS])
Exemplo n.º 23
0
 def build(self, *hiddens):
     return theanets.Autoencoder(
         [self.NUM_INPUTS] + list(hiddens) + [self.NUM_INPUTS],
         weighted=True)
#!/usr/bin/env python

import matplotlib.pyplot as plt
import theanets

from utils import load_mnist, plot_layers, plot_images

train, valid, _ = load_mnist()

net = theanets.Autoencoder(layers=(784, 256, 100, 64, ('tied', 100),
                                   ('tied', 256), ('tied', 784)), )
net.train(train,
          valid,
          algo='layerwise',
          patience=1,
          min_improvement=0.05,
          train_batches=100)
net.train(train, valid, min_improvment=0.01, train_batches=100)

plot_layers([net.find(i, 'w') for i in (1, 2, 3)], tied_weights=True)
plt.tight_layout()
plt.show()

valid = valid[0][:100]
plot_images(valid, 121, 'Sample data')
plot_images(net.predict(valid), 122, 'Reconstructed data')
plt.tight_layout()
plt.show()
Exemplo n.º 25
0
 def net(self):
     return theanets.Autoencoder(u.AE_LAYERS)
Exemplo n.º 26
0
 def test_feed_forward(self):
     net = theanets.Autoencoder((Base.INPUTS, self.l0, self.l))
     out = net.predict(np.random.randn(8, Base.INPUTS).astype('f'))
     assert out.shape == (8, Base.INPUTS)
Exemplo n.º 27
0
 def build(self):
     return theanets.Autoencoder([self.NUM_INPUTS, 10, self.NUM_INPUTS])